diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2015-02-19 20:55:17 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2015-02-19 20:55:17 +0000 | 
| commit | 8af9f2019d565de6161a3ce884a16942fe2dde29 (patch) | |
| tree | 16dc2c22ced0ee00053811c657e52dead9db406d /lib/Transforms | |
| parent | 608e665946afc2b89050fcf0b99070db2c006bee (diff) | |
Notes
Diffstat (limited to 'lib/Transforms')
| -rw-r--r-- | lib/Transforms/Scalar/MergedLoadStoreMotion.cpp | 10 | ||||
| -rw-r--r-- | lib/Transforms/Utils/LoopUnrollRuntime.cpp | 62 | ||||
| -rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 2 | 
3 files changed, 47 insertions, 27 deletions
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp index 8509713b3367..1f73cbc4ac30 100644 --- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -403,7 +403,7 @@ bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction& Start,                                                        const Instruction& End,                                                        AliasAnalysis::Location                                                        Loc) { -  return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Ref); +  return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::ModRef);  }  /// @@ -414,6 +414,7 @@ bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction& Start,  StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,                                                     StoreInst *Store0) {    DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n"); +  BasicBlock *BB0 = Store0->getParent();    for (BasicBlock::reverse_iterator RBI = BB1->rbegin(), RBE = BB1->rend();         RBI != RBE; ++RBI) {      Instruction *Inst = &*RBI; @@ -422,13 +423,14 @@ StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,         continue;      StoreInst *Store1 = cast<StoreInst>(Inst); -    BasicBlock *BB0 = Store0->getParent();      AliasAnalysis::Location Loc0 = AA->getLocation(Store0);      AliasAnalysis::Location Loc1 = AA->getLocation(Store1);      if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) && -      !isStoreSinkBarrierInRange(*Store1, BB1->back(), Loc1) && -      !isStoreSinkBarrierInRange(*Store0, BB0->back(), Loc0)) { +      !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store1))), +                                 BB1->back(), Loc1) && +      !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store0))), +                                 BB0->back(), Loc0)) {        return Store1;      }    } diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index f12cd61d463a..8a32215a299f 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -55,7 +55,7 @@ STATISTIC(NumRuntimeUnrolled,  /// - Branch around the original loop if the trip count is less  ///   than the unroll factor.  /// -static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, +static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,                            BasicBlock *LastPrologBB, BasicBlock *PrologEnd,                            BasicBlock *OrigPH, BasicBlock *NewPH,                            ValueToValueMapTy &VMap, Pass *P) { @@ -105,12 +105,19 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,      }    } -  // Create a branch around the orignal loop, which is taken if the -  // trip count is less than the unroll factor. +  // Create a branch around the orignal loop, which is taken if there are no +  // iterations remaining to be executed after running the prologue.    Instruction *InsertPt = PrologEnd->getTerminator(); + +  assert(Count != 0 && "nonsensical Count!"); + +  // If BECount <u (Count - 1) then (BECount + 1) & (Count - 1) == (BECount + 1) +  // (since Count is a power of 2).  This means %xtraiter is (BECount + 1) and +  // and all of the iterations of this loop were executed by the prologue.  Note +  // that if BECount <u (Count - 1) then (BECount + 1) cannot unsigned-overflow.    Instruction *BrLoopExit = -    new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount, -                 ConstantInt::get(TripCount->getType(), Count)); +    new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, BECount, +                 ConstantInt::get(BECount->getType(), Count - 1));    BasicBlock *Exit = L->getUniqueExitBlock();    assert(Exit && "Loop must have a single exit block only");    // Split the exit to maintain loop canonicalization guarantees @@ -292,23 +299,28 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,    // Only unroll loops with a computable trip count and the trip count needs    // to be an int value (allowing a pointer type is a TODO item) -  const SCEV *BECount = SE->getBackedgeTakenCount(L); -  if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy()) +  const SCEV *BECountSC = SE->getBackedgeTakenCount(L); +  if (isa<SCEVCouldNotCompute>(BECountSC) || +      !BECountSC->getType()->isIntegerTy())      return false; -  // If BECount is INT_MAX, we can't compute trip-count without overflow. -  if (BECount->isAllOnesValue()) -    return false; +  unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();    // Add 1 since the backedge count doesn't include the first loop iteration    const SCEV *TripCountSC = -    SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1)); +    SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));    if (isa<SCEVCouldNotCompute>(TripCountSC))      return false;    // We only handle cases when the unroll factor is a power of 2.    // Count is the loop unroll factor, the number of extra copies added + 1. -  if ((Count & (Count-1)) != 0) +  if (!isPowerOf2_32(Count)) +    return false; + +  // This constraint lets us deal with an overflowing trip count easily; see the +  // comment on ModVal below.  This check is equivalent to `Log2(Count) < +  // BEWidth`. +  if (static_cast<uint64_t>(Count) > (1ULL << BEWidth))      return false;    // If this loop is nested, then the loop unroller changes the code in @@ -330,16 +342,23 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,    SCEVExpander Expander(*SE, "loop-unroll");    Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),                                              PreHeaderBR); +  Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), +                                          PreHeaderBR);    IRBuilder<> B(PreHeaderBR);    Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); -  // Check if for no extra iterations, then jump to cloned/unrolled loop. -  // We have to check that the trip count computation didn't overflow when -  // adding one to the backedge taken count. -  Value *LCmp = B.CreateIsNotNull(ModVal, "lcmp.mod"); -  Value *OverflowCheck = B.CreateIsNull(TripCount, "lcmp.overflow"); -  Value *BranchVal = B.CreateOr(OverflowCheck, LCmp, "lcmp.or"); +  // If ModVal is zero, we know that either +  //  1. there are no iteration to be run in the prologue loop +  // OR +  //  2. the addition computing TripCount overflowed +  // +  // If (2) is true, we know that TripCount really is (1 << BEWidth) and so the +  // number of iterations that remain to be run in the original loop is a +  // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we +  // explicitly check this above). + +  Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod");    // Branch to either the extra iterations or the cloned/unrolled loop    // We will fix up the true branch label when adding loop body copies @@ -362,10 +381,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,    std::vector<BasicBlock *> NewBlocks;    ValueToValueMapTy VMap; -  // If unroll count is 2 and we can't overflow in tripcount computation (which -  // is BECount + 1), then we don't need a loop for prologue, and we can unroll -  // it. We can be sure that we don't overflow only if tripcount is a constant. -  bool UnrollPrologue = (Count == 2 && isa<ConstantInt>(TripCount)); +  bool UnrollPrologue = Count == 2;    // Clone all the basic blocks in the loop. If Count is 2, we don't clone    // the loop, otherwise we create a cloned loop to execute the extra @@ -391,7 +407,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,    // Connect the prolog code to the original loop and update the    // PHI functions.    BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]); -  ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, +  ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap,                  LPM->getAsPass());    NumRuntimeUnrolled++;    return true; diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 557304ed56c5..47b92a37cf1d 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1874,6 +1874,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {          // wide store needs to start at the last vector element.          PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));          PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); +        Mask[Part] = reverseVector(Mask[Part]);        }        Value *VecPtr = Builder.CreateBitCast(PartPtr, @@ -1902,6 +1903,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {        // wide load needs to start at the last vector element.        PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));        PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); +      Mask[Part] = reverseVector(Mask[Part]);      }      Instruction* NewLI;  | 
