diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-30 16:33:32 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-30 16:33:32 +0000 |
commit | 51315c45ff5643a27f9c84b816db54ee870ba29b (patch) | |
tree | 1d87443fa0e53d3e6b315ce25787e64be0906bf7 /contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp | |
parent | 6dfd050075216be8538ae375a22d30db72916f7e (diff) | |
parent | eb11fae6d08f479c0799db45860a98af528fa6e7 (diff) |
Notes
Diffstat (limited to 'contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp')
-rw-r--r-- | contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp | 298 |
1 files changed, 209 insertions, 89 deletions
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index dc83b6d4d292..5f3d127202ad 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -6,6 +6,38 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// This pass merges loads/stores to/from sequential memory addresses into vector +// loads/stores. Although there's nothing GPU-specific in here, this pass is +// motivated by the microarchitectural quirks of nVidia and AMD GPUs. +// +// (For simplicity below we talk about loads only, but everything also applies +// to stores.) +// +// This pass is intended to be run late in the pipeline, after other +// vectorization opportunities have been exploited. So the assumption here is +// that immediately following our new vector load we'll need to extract out the +// individual elements of the load, so we can operate on them individually. +// +// On CPUs this transformation is usually not beneficial, because extracting the +// elements of a vector register is expensive on most architectures. It's +// usually better just to load each element individually into its own scalar +// register. +// +// However, nVidia and AMD GPUs don't have proper vector registers. Instead, a +// "vector load" loads directly into a series of scalar registers. In effect, +// extracting the elements of the vector is free. It's therefore always +// beneficial to vectorize a sequence of loads on these architectures. +// +// Vectorizing (perhaps a better name might be "coalescing") loads can have +// large performance impacts on GPU kernels, and opportunities for vectorizing +// are common in GPU code. This pass tries very hard to find such +// opportunities; its runtime is quadratic in the number of loads in a BB. +// +// Some CPU architectures, such as ARM, have instructions that load into +// multiple scalar registers, similar to a GPU vectorized load. In theory ARM +// could use this pass (with some modifications), but currently it implements +// its own pass to do something similar to what we do here. #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" @@ -21,6 +53,7 @@ #include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Attributes.h" @@ -45,7 +78,6 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Vectorize.h" #include <algorithm> #include <cassert> @@ -65,8 +97,16 @@ static const unsigned StackAdjustedAlignment = 4; namespace { +/// ChainID is an arbitrary token that is allowed to be different only for the +/// accesses that are guaranteed to be considered non-consecutive by +/// Vectorizer::isConsecutiveAccess. It's used for grouping instructions +/// together and reducing the number of instructions the main search operates on +/// at a time, i.e. this is to reduce compile time and nothing else as the main +/// search has O(n^2) time complexity. The underlying type of ChainID should not +/// be relied upon. +using ChainID = const Value *; using InstrList = SmallVector<Instruction *, 8>; -using InstrListMap = MapVector<Value *, InstrList>; +using InstrListMap = MapVector<ChainID, InstrList>; class Vectorizer { Function &F; @@ -86,10 +126,6 @@ public: bool run(); private: - Value *getPointerOperand(Value *I) const; - - GetElementPtrInst *getSourceGEP(Value *Src) const; - unsigned getPointerAddressSpace(Value *I); unsigned getAlignment(LoadInst *LI) const { @@ -108,7 +144,15 @@ private: return DL.getABITypeAlignment(SI->getValueOperand()->getType()); } + static const unsigned MaxDepth = 3; + bool isConsecutiveAccess(Value *A, Value *B); + bool areConsecutivePointers(Value *PtrA, Value *PtrB, const APInt &PtrDelta, + unsigned Depth = 0) const; + bool lookThroughComplexAddresses(Value *PtrA, Value *PtrB, APInt PtrDelta, + unsigned Depth) const; + bool lookThroughSelects(Value *PtrA, Value *PtrB, const APInt &PtrDelta, + unsigned Depth) const; /// After vectorization, reorder the instructions that I depends on /// (the instructions defining its operands), to ensure they dominate I. @@ -239,14 +283,6 @@ bool Vectorizer::run() { return Changed; } -Value *Vectorizer::getPointerOperand(Value *I) const { - if (LoadInst *LI = dyn_cast<LoadInst>(I)) - return LI->getPointerOperand(); - if (StoreInst *SI = dyn_cast<StoreInst>(I)) - return SI->getPointerOperand(); - return nullptr; -} - unsigned Vectorizer::getPointerAddressSpace(Value *I) { if (LoadInst *L = dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace(); @@ -255,23 +291,10 @@ unsigned Vectorizer::getPointerAddressSpace(Value *I) { return -1; } -GetElementPtrInst *Vectorizer::getSourceGEP(Value *Src) const { - // First strip pointer bitcasts. Make sure pointee size is the same with - // and without casts. - // TODO: a stride set by the add instruction below can match the difference - // in pointee type size here. Currently it will not be vectorized. - Value *SrcPtr = getPointerOperand(Src); - Value *SrcBase = SrcPtr->stripPointerCasts(); - if (DL.getTypeStoreSize(SrcPtr->getType()->getPointerElementType()) == - DL.getTypeStoreSize(SrcBase->getType()->getPointerElementType())) - SrcPtr = SrcBase; - return dyn_cast<GetElementPtrInst>(SrcPtr); -} - // FIXME: Merge with llvm::isConsecutiveAccess bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { - Value *PtrA = getPointerOperand(A); - Value *PtrB = getPointerOperand(B); + Value *PtrA = getLoadStorePointerOperand(A); + Value *PtrB = getLoadStorePointerOperand(B); unsigned ASA = getPointerAddressSpace(A); unsigned ASB = getPointerAddressSpace(B); @@ -280,18 +303,27 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { return false; // Make sure that A and B are different pointers of the same size type. - unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA); Type *PtrATy = PtrA->getType()->getPointerElementType(); Type *PtrBTy = PtrB->getType()->getPointerElementType(); if (PtrA == PtrB || + PtrATy->isVectorTy() != PtrBTy->isVectorTy() || DL.getTypeStoreSize(PtrATy) != DL.getTypeStoreSize(PtrBTy) || DL.getTypeStoreSize(PtrATy->getScalarType()) != DL.getTypeStoreSize(PtrBTy->getScalarType())) return false; + unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA); APInt Size(PtrBitWidth, DL.getTypeStoreSize(PtrATy)); - APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0); + return areConsecutivePointers(PtrA, PtrB, Size); +} + +bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB, + const APInt &PtrDelta, + unsigned Depth) const { + unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(PtrA->getType()); + APInt OffsetA(PtrBitWidth, 0); + APInt OffsetB(PtrBitWidth, 0); PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); @@ -300,11 +332,11 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { // Check if they are based on the same pointer. That makes the offsets // sufficient. if (PtrA == PtrB) - return OffsetDelta == Size; + return OffsetDelta == PtrDelta; // Compute the necessary base pointer delta to have the necessary final delta - // equal to the size. - APInt BaseDelta = Size - OffsetDelta; + // equal to the pointer delta requested. + APInt BaseDelta = PtrDelta - OffsetDelta; // Compute the distance with SCEV between the base pointers. const SCEV *PtrSCEVA = SE.getSCEV(PtrA); @@ -314,71 +346,127 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { if (X == PtrSCEVB) return true; + // The above check will not catch the cases where one of the pointers is + // factorized but the other one is not, such as (C + (S * (A + B))) vs + // (AS + BS). Get the minus scev. That will allow re-combining the expresions + // and getting the simplified difference. + const SCEV *Dist = SE.getMinusSCEV(PtrSCEVB, PtrSCEVA); + if (C == Dist) + return true; + // Sometimes even this doesn't work, because SCEV can't always see through // patterns that look like (gep (ext (add (shl X, C1), C2))). Try checking // things the hard way. + return lookThroughComplexAddresses(PtrA, PtrB, BaseDelta, Depth); +} + +bool Vectorizer::lookThroughComplexAddresses(Value *PtrA, Value *PtrB, + APInt PtrDelta, + unsigned Depth) const { + auto *GEPA = dyn_cast<GetElementPtrInst>(PtrA); + auto *GEPB = dyn_cast<GetElementPtrInst>(PtrB); + if (!GEPA || !GEPB) + return lookThroughSelects(PtrA, PtrB, PtrDelta, Depth); // Look through GEPs after checking they're the same except for the last // index. - GetElementPtrInst *GEPA = getSourceGEP(A); - GetElementPtrInst *GEPB = getSourceGEP(B); - if (!GEPA || !GEPB || GEPA->getNumOperands() != GEPB->getNumOperands()) + if (GEPA->getNumOperands() != GEPB->getNumOperands() || + GEPA->getPointerOperand() != GEPB->getPointerOperand()) return false; - unsigned FinalIndex = GEPA->getNumOperands() - 1; - for (unsigned i = 0; i < FinalIndex; i++) - if (GEPA->getOperand(i) != GEPB->getOperand(i)) + gep_type_iterator GTIA = gep_type_begin(GEPA); + gep_type_iterator GTIB = gep_type_begin(GEPB); + for (unsigned I = 0, E = GEPA->getNumIndices() - 1; I < E; ++I) { + if (GTIA.getOperand() != GTIB.getOperand()) return false; + ++GTIA; + ++GTIB; + } - Instruction *OpA = dyn_cast<Instruction>(GEPA->getOperand(FinalIndex)); - Instruction *OpB = dyn_cast<Instruction>(GEPB->getOperand(FinalIndex)); + Instruction *OpA = dyn_cast<Instruction>(GTIA.getOperand()); + Instruction *OpB = dyn_cast<Instruction>(GTIB.getOperand()); if (!OpA || !OpB || OpA->getOpcode() != OpB->getOpcode() || OpA->getType() != OpB->getType()) return false; + if (PtrDelta.isNegative()) { + if (PtrDelta.isMinSignedValue()) + return false; + PtrDelta.negate(); + std::swap(OpA, OpB); + } + uint64_t Stride = DL.getTypeAllocSize(GTIA.getIndexedType()); + if (PtrDelta.urem(Stride) != 0) + return false; + unsigned IdxBitWidth = OpA->getType()->getScalarSizeInBits(); + APInt IdxDiff = PtrDelta.udiv(Stride).zextOrSelf(IdxBitWidth); + // Only look through a ZExt/SExt. if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA)) return false; bool Signed = isa<SExtInst>(OpA); - OpA = dyn_cast<Instruction>(OpA->getOperand(0)); + // At this point A could be a function parameter, i.e. not an instruction + Value *ValA = OpA->getOperand(0); OpB = dyn_cast<Instruction>(OpB->getOperand(0)); - if (!OpA || !OpB || OpA->getType() != OpB->getType()) + if (!OpB || ValA->getType() != OpB->getType()) return false; - // Now we need to prove that adding 1 to OpA won't overflow. + // Now we need to prove that adding IdxDiff to ValA won't overflow. bool Safe = false; - // First attempt: if OpB is an add with NSW/NUW, and OpB is 1 added to OpA, - // we're okay. + // First attempt: if OpB is an add with NSW/NUW, and OpB is IdxDiff added to + // ValA, we're okay. if (OpB->getOpcode() == Instruction::Add && isa<ConstantInt>(OpB->getOperand(1)) && - cast<ConstantInt>(OpB->getOperand(1))->getSExtValue() > 0) { + IdxDiff.sle(cast<ConstantInt>(OpB->getOperand(1))->getSExtValue())) { if (Signed) Safe = cast<BinaryOperator>(OpB)->hasNoSignedWrap(); else Safe = cast<BinaryOperator>(OpB)->hasNoUnsignedWrap(); } - unsigned BitWidth = OpA->getType()->getScalarSizeInBits(); + unsigned BitWidth = ValA->getType()->getScalarSizeInBits(); // Second attempt: - // If any bits are known to be zero other than the sign bit in OpA, we can - // add 1 to it while guaranteeing no overflow of any sort. + // If all set bits of IdxDiff or any higher order bit other than the sign bit + // are known to be zero in ValA, we can add Diff to it while guaranteeing no + // overflow of any sort. if (!Safe) { + OpA = dyn_cast<Instruction>(ValA); + if (!OpA) + return false; KnownBits Known(BitWidth); computeKnownBits(OpA, Known, DL, 0, nullptr, OpA, &DT); - if (Known.countMaxTrailingOnes() < (BitWidth - 1)) - Safe = true; + APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.getBitWidth()); + if (Signed) + BitsAllowedToBeSet.clearBit(BitWidth - 1); + if (BitsAllowedToBeSet.ult(IdxDiff)) + return false; } - if (!Safe) + const SCEV *OffsetSCEVA = SE.getSCEV(ValA); + const SCEV *OffsetSCEVB = SE.getSCEV(OpB); + const SCEV *C = SE.getConstant(IdxDiff.trunc(BitWidth)); + const SCEV *X = SE.getAddExpr(OffsetSCEVA, C); + return X == OffsetSCEVB; +} + +bool Vectorizer::lookThroughSelects(Value *PtrA, Value *PtrB, + const APInt &PtrDelta, + unsigned Depth) const { + if (Depth++ == MaxDepth) return false; - const SCEV *OffsetSCEVA = SE.getSCEV(OpA); - const SCEV *OffsetSCEVB = SE.getSCEV(OpB); - const SCEV *One = SE.getConstant(APInt(BitWidth, 1)); - const SCEV *X2 = SE.getAddExpr(OffsetSCEVA, One); - return X2 == OffsetSCEVB; + if (auto *SelectA = dyn_cast<SelectInst>(PtrA)) { + if (auto *SelectB = dyn_cast<SelectInst>(PtrB)) { + return SelectA->getCondition() == SelectB->getCondition() && + areConsecutivePointers(SelectA->getTrueValue(), + SelectB->getTrueValue(), PtrDelta, Depth) && + areConsecutivePointers(SelectA->getFalseValue(), + SelectB->getFalseValue(), PtrDelta, Depth); + } + } + return false; } void Vectorizer::reorder(Instruction *I) { @@ -448,7 +536,7 @@ Vectorizer::getBoundaryInstrs(ArrayRef<Instruction *> Chain) { void Vectorizer::eraseInstructions(ArrayRef<Instruction *> Chain) { SmallVector<Instruction *, 16> Instrs; for (Instruction *I : Chain) { - Value *PtrOperand = getPointerOperand(I); + Value *PtrOperand = getLoadStorePointerOperand(I); assert(PtrOperand && "Instruction must have a pointer operand."); Instrs.push_back(I); if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(PtrOperand)) @@ -484,7 +572,7 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) { SmallVector<Instruction *, 16> ChainInstrs; bool IsLoadChain = isa<LoadInst>(Chain[0]); - DEBUG({ + LLVM_DEBUG({ for (Instruction *I : Chain) { if (IsLoadChain) assert(isa<LoadInst>(I) && @@ -506,11 +594,12 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) { Intrinsic::sideeffect) { // Ignore llvm.sideeffect calls. } else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) { - DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I << '\n'); + LLVM_DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I + << '\n'); break; } else if (!IsLoadChain && (I.mayReadOrWriteMemory() || I.mayThrow())) { - DEBUG(dbgs() << "LSV: Found may-read/write/throw operation: " << I - << '\n'); + LLVM_DEBUG(dbgs() << "LSV: Found may-read/write/throw operation: " << I + << '\n'); break; } } @@ -536,32 +625,40 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) { if (BarrierMemoryInstr && OBB.dominates(BarrierMemoryInstr, MemInstr)) break; - if (isa<LoadInst>(MemInstr) && isa<LoadInst>(ChainInstr)) + auto *MemLoad = dyn_cast<LoadInst>(MemInstr); + auto *ChainLoad = dyn_cast<LoadInst>(ChainInstr); + if (MemLoad && ChainLoad) continue; + // We can ignore the alias if the we have a load store pair and the load + // is known to be invariant. The load cannot be clobbered by the store. + auto IsInvariantLoad = [](const LoadInst *LI) -> bool { + return LI->getMetadata(LLVMContext::MD_invariant_load); + }; + // We can ignore the alias as long as the load comes before the store, // because that means we won't be moving the load past the store to // vectorize it (the vectorized load is inserted at the location of the // first load in the chain). - if (isa<StoreInst>(MemInstr) && isa<LoadInst>(ChainInstr) && - OBB.dominates(ChainInstr, MemInstr)) + if (isa<StoreInst>(MemInstr) && ChainLoad && + (IsInvariantLoad(ChainLoad) || OBB.dominates(ChainLoad, MemInstr))) continue; // Same case, but in reverse. - if (isa<LoadInst>(MemInstr) && isa<StoreInst>(ChainInstr) && - OBB.dominates(MemInstr, ChainInstr)) + if (MemLoad && isa<StoreInst>(ChainInstr) && + (IsInvariantLoad(MemLoad) || OBB.dominates(MemLoad, ChainInstr))) continue; if (!AA.isNoAlias(MemoryLocation::get(MemInstr), MemoryLocation::get(ChainInstr))) { - DEBUG({ + LLVM_DEBUG({ dbgs() << "LSV: Found alias:\n" " Aliasing instruction and pointer:\n" << " " << *MemInstr << '\n' - << " " << *getPointerOperand(MemInstr) << '\n' + << " " << *getLoadStorePointerOperand(MemInstr) << '\n' << " Aliased instruction and pointer:\n" << " " << *ChainInstr << '\n' - << " " << *getPointerOperand(ChainInstr) << '\n'; + << " " << *getLoadStorePointerOperand(ChainInstr) << '\n'; }); // Save this aliasing memory instruction as a barrier, but allow other // instructions that precede the barrier to be vectorized with this one. @@ -594,6 +691,20 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) { return Chain.slice(0, ChainIdx); } +static ChainID getChainID(const Value *Ptr, const DataLayout &DL) { + const Value *ObjPtr = GetUnderlyingObject(Ptr, DL); + if (const auto *Sel = dyn_cast<SelectInst>(ObjPtr)) { + // The select's themselves are distinct instructions even if they share the + // same condition and evaluate to consecutive pointers for true and false + // values of the condition. Therefore using the select's themselves for + // grouping instructions would put consecutive accesses into different lists + // and they won't be even checked for being consecutive, and won't be + // vectorized. + return Sel->getCondition(); + } + return ObjPtr; +} + std::pair<InstrListMap, InstrListMap> Vectorizer::collectInstructions(BasicBlock *BB) { InstrListMap LoadRefs; @@ -632,8 +743,12 @@ Vectorizer::collectInstructions(BasicBlock *BB) { unsigned AS = Ptr->getType()->getPointerAddressSpace(); unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS); + unsigned VF = VecRegSize / TySize; + VectorType *VecTy = dyn_cast<VectorType>(Ty); + // No point in looking at these if they're too big to vectorize. - if (TySize > VecRegSize / 2) + if (TySize > VecRegSize / 2 || + (VecTy && TTI.getLoadVectorFactor(VF, TySize, TySize / 8, VecTy) == 0)) continue; // Make sure all the users of a vector are constant-index extracts. @@ -644,8 +759,8 @@ Vectorizer::collectInstructions(BasicBlock *BB) { continue; // Save the load locations. - Value *ObjPtr = GetUnderlyingObject(Ptr, DL); - LoadRefs[ObjPtr].push_back(LI); + const ChainID ID = getChainID(Ptr, DL); + LoadRefs[ID].push_back(LI); } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) { if (!SI->isSimple()) continue; @@ -675,8 +790,12 @@ Vectorizer::collectInstructions(BasicBlock *BB) { unsigned AS = Ptr->getType()->getPointerAddressSpace(); unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS); + unsigned VF = VecRegSize / TySize; + VectorType *VecTy = dyn_cast<VectorType>(Ty); + // No point in looking at these if they're too big to vectorize. - if (TySize > VecRegSize / 2) + if (TySize > VecRegSize / 2 || + (VecTy && TTI.getStoreVectorFactor(VF, TySize, TySize / 8, VecTy) == 0)) continue; if (isa<VectorType>(Ty) && !llvm::all_of(SI->users(), [](const User *U) { @@ -686,8 +805,8 @@ Vectorizer::collectInstructions(BasicBlock *BB) { continue; // Save store location. - Value *ObjPtr = GetUnderlyingObject(Ptr, DL); - StoreRefs[ObjPtr].push_back(SI); + const ChainID ID = getChainID(Ptr, DL); + StoreRefs[ID].push_back(SI); } } @@ -697,12 +816,12 @@ Vectorizer::collectInstructions(BasicBlock *BB) { bool Vectorizer::vectorizeChains(InstrListMap &Map) { bool Changed = false; - for (const std::pair<Value *, InstrList> &Chain : Map) { + for (const std::pair<ChainID, InstrList> &Chain : Map) { unsigned Size = Chain.second.size(); if (Size < 2) continue; - DEBUG(dbgs() << "LSV: Analyzing a chain of length " << Size << ".\n"); + LLVM_DEBUG(dbgs() << "LSV: Analyzing a chain of length " << Size << ".\n"); // Process the stores in chunks of 64. for (unsigned CI = 0, CE = Size; CI < CE; CI += 64) { @@ -716,7 +835,8 @@ bool Vectorizer::vectorizeChains(InstrListMap &Map) { } bool Vectorizer::vectorizeInstructions(ArrayRef<Instruction *> Instrs) { - DEBUG(dbgs() << "LSV: Vectorizing " << Instrs.size() << " instructions.\n"); + LLVM_DEBUG(dbgs() << "LSV: Vectorizing " << Instrs.size() + << " instructions.\n"); SmallVector<int, 16> Heads, Tails; int ConsecutiveChain[64]; @@ -852,14 +972,14 @@ bool Vectorizer::vectorizeStoreChain( // vector factor, break it into two pieces. unsigned TargetVF = TTI.getStoreVectorFactor(VF, Sz, SzInBytes, VecTy); if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) { - DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor." - " Creating two separate arrays.\n"); + LLVM_DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor." + " Creating two separate arrays.\n"); return vectorizeStoreChain(Chain.slice(0, TargetVF), InstructionsProcessed) | vectorizeStoreChain(Chain.slice(TargetVF), InstructionsProcessed); } - DEBUG({ + LLVM_DEBUG({ dbgs() << "LSV: Stores to vectorize:\n"; for (Instruction *I : Chain) dbgs() << " " << *I << "\n"; @@ -1000,8 +1120,8 @@ bool Vectorizer::vectorizeLoadChain( // vector factor, break it into two pieces. unsigned TargetVF = TTI.getLoadVectorFactor(VF, Sz, SzInBytes, VecTy); if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) { - DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor." - " Creating two separate arrays.\n"); + LLVM_DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor." + " Creating two separate arrays.\n"); return vectorizeLoadChain(Chain.slice(0, TargetVF), InstructionsProcessed) | vectorizeLoadChain(Chain.slice(TargetVF), InstructionsProcessed); } @@ -1024,7 +1144,7 @@ bool Vectorizer::vectorizeLoadChain( Alignment = NewAlign; } - DEBUG({ + LLVM_DEBUG({ dbgs() << "LSV: Loads to vectorize:\n"; for (Instruction *I : Chain) I->dump(); @@ -1107,7 +1227,7 @@ bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace, bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(), SzInBytes * 8, AddressSpace, Alignment, &Fast); - DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allows - << " and fast? " << Fast << "\n";); + LLVM_DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allows + << " and fast? " << Fast << "\n";); return !Allows || !Fast; } |