diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-01-14 15:37:50 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-01-14 15:37:50 +0000 |
commit | 581a6d8501ff5614297da837b81ed3b6956361ea (patch) | |
tree | 985ee91d0ca1d3e6506ac5ff7e37f5b67adfec09 /lib/Transforms | |
parent | 909545a822eef491158f831688066f0ec2866938 (diff) | |
download | src-test2-581a6d8501ff5614297da837b81ed3b6956361ea.tar.gz src-test2-581a6d8501ff5614297da837b81ed3b6956361ea.zip |
Notes
Diffstat (limited to 'lib/Transforms')
36 files changed, 576 insertions, 376 deletions
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp index 82daf754be0d..deb7e819480b 100644 --- a/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/lib/Transforms/IPO/LowerTypeTests.cpp @@ -270,12 +270,12 @@ class LowerTypeTestsModule { /// relative to the start address. Constant *AlignLog2; - /// ByteArray, Inline, AllOnes: size of the memory region covering members - /// of this type identifier as a multiple of 2^AlignLog2. - Constant *Size; + /// ByteArray, Inline, AllOnes: one less than the size of the memory region + /// covering members of this type identifier as a multiple of 2^AlignLog2. + Constant *SizeM1; - /// ByteArray, Inline, AllOnes: range of the size expressed as a bit width. - unsigned SizeBitWidth; + /// ByteArray, Inline, AllOnes: range of SizeM1 expressed as a bit width. + unsigned SizeM1BitWidth; /// ByteArray: the byte array to test the address against. Constant *TheByteArray; @@ -593,8 +593,8 @@ Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI, IntPtrTy)); Value *BitOffset = B.CreateOr(OffsetSHR, OffsetSHL); - Constant *BitSizeConst = ConstantExpr::getZExt(TIL.Size, IntPtrTy); - Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst); + Constant *BitSizeConst = ConstantExpr::getZExt(TIL.SizeM1, IntPtrTy); + Value *OffsetInRange = B.CreateICmpULE(BitOffset, BitSizeConst); // If the bit set is all ones, testing against it is unnecessary. if (TIL.TheKind == TypeTestResolution::AllOnes) @@ -711,13 +711,13 @@ void LowerTypeTestsModule::lowerTypeTestCalls( if (BSI.isAllOnes()) { TIL.TheKind = (BSI.BitSize == 1) ? TypeTestResolution::Single : TypeTestResolution::AllOnes; - TIL.SizeBitWidth = (BSI.BitSize <= 256) ? 8 : 32; - TIL.Size = ConstantInt::get((BSI.BitSize <= 256) ? Int8Ty : Int32Ty, - BSI.BitSize); + TIL.SizeM1BitWidth = (BSI.BitSize <= 128) ? 7 : 32; + TIL.SizeM1 = ConstantInt::get((BSI.BitSize <= 128) ? Int8Ty : Int32Ty, + BSI.BitSize - 1); } else if (BSI.BitSize <= 64) { TIL.TheKind = TypeTestResolution::Inline; - TIL.SizeBitWidth = (BSI.BitSize <= 32) ? 5 : 6; - TIL.Size = ConstantInt::get(Int8Ty, BSI.BitSize); + TIL.SizeM1BitWidth = (BSI.BitSize <= 32) ? 5 : 6; + TIL.SizeM1 = ConstantInt::get(Int8Ty, BSI.BitSize - 1); uint64_t InlineBits = 0; for (auto Bit : BSI.Bits) InlineBits |= uint64_t(1) << Bit; @@ -728,9 +728,9 @@ void LowerTypeTestsModule::lowerTypeTestCalls( (BSI.BitSize <= 32) ? Int32Ty : Int64Ty, InlineBits); } else { TIL.TheKind = TypeTestResolution::ByteArray; - TIL.SizeBitWidth = (BSI.BitSize <= 256) ? 8 : 32; - TIL.Size = ConstantInt::get((BSI.BitSize <= 256) ? Int8Ty : Int32Ty, - BSI.BitSize); + TIL.SizeM1BitWidth = (BSI.BitSize <= 128) ? 7 : 32; + TIL.SizeM1 = ConstantInt::get((BSI.BitSize <= 128) ? Int8Ty : Int32Ty, + BSI.BitSize - 1); ++NumByteArraysCreated; ByteArrayInfo *BAI = createByteArray(BSI); TIL.TheByteArray = BAI->ByteArray; diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 55151c13b430..2d34c1cc74bd 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1371,15 +1371,9 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); - if (isa<Constant>(RHS)) { - if (isa<PHINode>(LHS)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - - if (SelectInst *SI = dyn_cast<SelectInst>(LHS)) - if (Instruction *NV = FoldOpIntoSelect(I, SI)) - return NV; - } + if (isa<Constant>(RHS)) + if (Instruction *FoldedFAdd = foldOpWithConstantIntoOperand(I)) + return FoldedFAdd; // -A + B --> B - A // -A + -B --> -(A + B) diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index a59b43d6af5f..da5384a86aac 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1382,13 +1382,8 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } } - // Try to fold constant and into select arguments. - if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI)) - return R; - if (isa<PHINode>(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; + if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I)) + return FoldedLogic; } if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder)) @@ -2125,14 +2120,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Builder->getInt(C1->getValue() & ~RHS->getValue())); } - // Try to fold constant and into select arguments. - if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI)) - return R; - - if (isa<PHINode>(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; + if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I)) + return FoldedLogic; } // Given an OR instruction, check to see if this is a bswap. @@ -2594,13 +2583,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { } } - // Try to fold constant and into select arguments. - if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI)) - return R; - if (isa<PHINode>(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; + if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I)) + return FoldedLogic; } BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index b29ed3c87451..2ef82ba3ed8c 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1599,21 +1599,17 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // fma fneg(x), fneg(y), z -> fma x, y, z if (match(Src0, m_FNeg(m_Value(LHS))) && match(Src1, m_FNeg(m_Value(RHS)))) { - CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(), - {LHS, RHS, II->getArgOperand(2)}); - NewCall->takeName(II); - NewCall->copyFastMathFlags(II); - return replaceInstUsesWith(*II, NewCall); + II->setArgOperand(0, LHS); + II->setArgOperand(1, RHS); + return II; } // fma fabs(x), fabs(x), z -> fma x, x, z if (match(Src0, m_Intrinsic<Intrinsic::fabs>(m_Value(LHS))) && match(Src1, m_Intrinsic<Intrinsic::fabs>(m_Value(RHS))) && LHS == RHS) { - CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(), - {LHS, LHS, II->getArgOperand(2)}); - NewCall->takeName(II); - NewCall->copyFastMathFlags(II); - return replaceInstUsesWith(*II, NewCall); + II->setArgOperand(0, LHS); + II->setArgOperand(1, RHS); + return II; } // fma x, 1, z -> fadd x, z @@ -2760,6 +2756,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (KnownOne.isAllOnesValue()) return eraseInstFromFunction(*II); + // Update the cache of affected values for this assumption (we might be + // here because we just simplified the condition). + AC.updateAffectedValues(II); break; } case Intrinsic::experimental_gc_relocate: { diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 3cefe715e567..2847ce858e79 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -320,7 +320,6 @@ private: Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const; Type *FindElementAtOffset(PointerType *PtrTy, int64_t Offset, SmallVectorImpl<Value *> &NewIndices); - Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); /// Classify whether a cast is worth optimizing. /// @@ -537,13 +536,21 @@ private: Value *SimplifyVectorOp(BinaryOperator &Inst); Value *SimplifyBSwap(BinaryOperator &Inst); - // FoldOpIntoPhi - Given a binary operator, cast instruction, or select - // which has a PHI node as operand #0, see if we can fold the instruction - // into the PHI (which is only possible if all operands to the PHI are - // constants). - // + + /// Given a binary operator, cast instruction, or select which has a PHI node + /// as operand #0, see if we can fold the instruction into the PHI (which is + /// only possible if all operands to the PHI are constants). Instruction *FoldOpIntoPhi(Instruction &I); + /// Given an instruction with a select as one operand and a constant as the + /// other operand, try to fold the binary operator into the select arguments. + /// This also works for Cast instructions, which obviously do not have a + /// second operand. + Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); + + /// This is a convenience wrapper function for the above two functions. + Instruction *foldOpWithConstantIntoOperand(Instruction &I); + /// \brief Try to rotate an operation below a PHI node, using PHI nodes for /// its operands. Instruction *FoldPHIArgOpIntoPHI(PHINode &PN); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index ac64671725f3..45a19fb0f1f2 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -267,14 +267,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { // Simplify mul instructions with a constant RHS. if (isa<Constant>(Op1)) { - // Try to fold constant mul into select arguments. - if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI)) - return R; - - if (isa<PHINode>(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; + if (Instruction *FoldedMul = foldOpWithConstantIntoOperand(I)) + return FoldedMul; // Canonicalize (X+C1)*CI -> X*CI+C1*CI. { @@ -626,14 +620,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { // Simplify mul instructions with a constant RHS. if (isa<Constant>(Op1)) { - // Try to fold constant mul into select arguments. - if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI)) - return R; - - if (isa<PHINode>(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; + if (Instruction *FoldedMul = foldOpWithConstantIntoOperand(I)) + return FoldedMul; // (fmul X, -1.0) --> (fsub -0.0, X) if (match(Op1, m_SpecificFP(-1.0))) { @@ -956,14 +944,9 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { } } - if (*C2 != 0) { // avoid X udiv 0 - if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI)) - return R; - if (isa<PHINode>(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } + if (*C2 != 0) // avoid X udiv 0 + if (Instruction *FoldedDiv = foldOpWithConstantIntoOperand(I)) + return FoldedDiv; } } @@ -1443,6 +1426,16 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { } } + Value *LHS; + Value *RHS; + + // -x / -y -> x / y + if (match(Op0, m_FNeg(m_Value(LHS))) && match(Op1, m_FNeg(m_Value(RHS)))) { + I.setOperand(0, LHS); + I.setOperand(1, RHS); + return &I; + } + return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 184897f751fe..4cbffe9533b7 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -29,7 +29,7 @@ using namespace llvm::PatternMatch; /// locations of the original PHI node arguments. DebugLoc InstCombiner::PHIArgMergedDebugLoc(PHINode &PN) { auto *FirstInst = cast<Instruction>(PN.getIncomingValue(0)); - DILocation *Loc = FirstInst->getDebugLoc(); + const DILocation *Loc = FirstInst->getDebugLoc(); for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { auto *I = cast<Instruction>(PN.getIncomingValue(i)); diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 5ad2a1c0e3e6..4ff9b64ac57c 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -530,13 +530,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, return BinaryOperator::CreateMul(BO->getOperand(0), ConstantExpr::getShl(BOOp, Op1)); - // Try to fold constant and into select arguments. - if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI)) - return R; - if (isa<PHINode>(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; + if (Instruction *FoldedShift = foldOpWithConstantIntoOperand(I)) + return FoldedShift; // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) { diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 9a52874c4c21..27fc34d23175 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -770,10 +770,6 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, return RI; } -/// Given an instruction with a select as one operand and a constant as the -/// other operand, try to fold the binary operator into the select arguments. -/// This also works for Cast instructions, which obviously do not have a second -/// operand. Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { // Don't modify shared select instructions. if (!SI->hasOneUse()) @@ -824,9 +820,6 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI); } -/// Given a binary operator, cast instruction, or select which has a PHI node as -/// operand #0, see if we can fold the instruction into the PHI (which is only -/// possible if all operands to the PHI are constants). Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { PHINode *PN = cast<PHINode>(I.getOperand(0)); unsigned NumPHIValues = PN->getNumIncomingValues(); @@ -964,6 +957,19 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { return replaceInstUsesWith(I, NewPN); } +Instruction *InstCombiner::foldOpWithConstantIntoOperand(Instruction &I) { + assert(isa<Constant>(I.getOperand(1)) && "Unexpected operand type"); + + if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) { + if (Instruction *NewSel = FoldOpIntoSelect(I, Sel)) + return NewSel; + } else if (isa<PHINode>(I.getOperand(0))) { + if (Instruction *NewPhi = FoldOpIntoPhi(I)) + return NewPhi; + } + return nullptr; +} + /// Given a pointer type and a constant offset, determine whether or not there /// is a sequence of GEP indices into the pointed type that will land us at the /// specified offset. If so, fill them into NewIndices and return the resultant diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 54bdc9e0772b..9c4b417e35e1 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1598,8 +1598,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, nullptr); unsigned SizeOfGlobalStruct = DL.getTypeAllocSize(GlobalStructTy); - assert((isPowerOf2_32(SizeOfGlobalStruct) || - !TargetTriple.isOSBinFormatCOFF()) && + assert(isPowerOf2_32(SizeOfGlobalStruct) && "global metadata will not be padded appropriately"); SmallVector<Constant *, 16> Initializers(UseMetadataArray ? n : 0); @@ -1766,13 +1765,11 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { GlobalValue::getRealLinkageName(G->getName())); Metadata->setSection(getGlobalMetadataSection()); + // We don't want any padding, but we also need a reasonable alignment. // The MSVC linker always inserts padding when linking incrementally. We // cope with that by aligning each struct to its size, which must be a power // of two. - if (TargetTriple.isOSBinFormatCOFF()) - Metadata->setAlignment(SizeOfGlobalStruct); - else - Metadata->setAlignment(1); // Don't leave padding in between. + Metadata->setAlignment(SizeOfGlobalStruct); // On platforms that support comdats, put the metadata and the // instrumented global in the same group. This ensures that the metadata diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index 8da3e31200f3..adea7e772447 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -32,6 +32,11 @@ cl::opt<bool> DoNameCompression("enable-name-compression", cl::desc("Enable name string compression"), cl::init(true)); +cl::opt<bool> DoHashBasedCounterSplit( + "hash-based-counter-split", + cl::desc("Rename counter variable of a comdat function based on cfg hash"), + cl::init(true)); + cl::opt<bool> ValueProfileStaticAlloc( "vp-static-alloc", cl::desc("Do static counter allocation for value profiler"), @@ -272,7 +277,16 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) { static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) { StringRef NamePrefix = getInstrProfNameVarPrefix(); StringRef Name = Inc->getName()->getName().substr(NamePrefix.size()); - return (Prefix + Name).str(); + Function *F = Inc->getParent()->getParent(); + Module *M = F->getParent(); + if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) || + !canRenameComdatFunc(*F)) + return (Prefix + Name).str(); + uint64_t FuncHash = Inc->getHash()->getZExtValue(); + SmallVector<char, 24> HashPostfix; + if (Name.endswith((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix))) + return (Prefix + Name).str(); + return (Prefix + Name + "." + Twine(FuncHash)).str(); } static inline bool shouldRecordFunctionAddr(Function *F) { diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 28f4f7ea1455..04f9a64bef9f 100644 --- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -119,7 +119,7 @@ static cl::opt<unsigned> MaxNumAnnotations( // Command line option to control appending FunctionHash to the name of a COMDAT // function. This is to avoid the hash mismatch caused by the preinliner. static cl::opt<bool> DoComdatRenaming( - "do-comdat-renaming", cl::init(true), cl::Hidden, + "do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner")); @@ -134,6 +134,12 @@ static cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", static cl::opt<bool> NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden); +// Command line option to enable/disable the warning about a hash mismatch in +// the profile data for Comdat functions, which often turns out to be false +// positive due to the pre-instrumentation inline. +static cl::opt<bool> NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", + cl::init(true), cl::Hidden); + // Command line option to enable/disable select instruction instrumentation. static cl::opt<bool> PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden); @@ -407,21 +413,9 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { static bool canRenameComdat( Function &F, std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { - if (F.getName().empty()) - return false; - if (!needsComdatForCounter(F, *(F.getParent()))) - return false; - // Only safe to do if this function may be discarded if it is not used - // in the compilation unit. - if (!GlobalValue::isDiscardableIfUnused(F.getLinkage())) + if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) return false; - // For AvailableExternallyLinkage functions. - if (!F.hasComdat()) { - assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); - return true; - } - // FIXME: Current only handle those Comdat groups that only containing one // function and function aliases. // (1) For a Comdat group containing multiple functions, we need to have a @@ -803,7 +797,11 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader) { } else if (Err == instrprof_error::hash_mismatch || Err == instrprof_error::malformed) { NumOfPGOMismatch++; - SkipWarning = NoPGOWarnMismatch; + SkipWarning = + NoPGOWarnMismatch || + (NoPGOWarnMismatchComdat && + (F.hasComdat() || + F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); } if (SkipWarning) diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 56df77f03028..06d3d6a73954 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -13,10 +13,12 @@ add_llvm_library(LLVMScalarOpts GuardWidening.cpp GVN.cpp GVNHoist.cpp + IVUsersPrinter.cpp InductiveRangeCheckElimination.cpp IndVarSimplify.cpp JumpThreading.cpp LICM.cpp + LoopAccessAnalysisPrinter.cpp LoopSink.cpp LoadCombine.cpp LoopDeletion.cpp @@ -26,6 +28,7 @@ add_llvm_library(LLVMScalarOpts LoopInstSimplify.cpp LoopInterchange.cpp LoopLoadElimination.cpp + LoopPassManager.cpp LoopRerollPass.cpp LoopRotation.cpp LoopSimplifyCFG.cpp diff --git a/lib/Transforms/Scalar/IVUsersPrinter.cpp b/lib/Transforms/Scalar/IVUsersPrinter.cpp new file mode 100644 index 000000000000..807593379283 --- /dev/null +++ b/lib/Transforms/Scalar/IVUsersPrinter.cpp @@ -0,0 +1,22 @@ +//===- IVUsersPrinter.cpp - Induction Variable Users Printer ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/IVUsersPrinter.h" +#include "llvm/Analysis/IVUsers.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +#define DEBUG_TYPE "iv-users" + +PreservedAnalyses IVUsersPrinterPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + AM.getResult<IVUsersAnalysis>(L, AR).print(OS); + return PreservedAnalyses::all(); +} diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 68faa886060a..1752fb75eb1b 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -25,15 +25,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/IndVarSimplify.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/LoopPassManager.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" @@ -49,6 +47,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -2096,7 +2096,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit"); } else { // In any other case, convert both IVInit and IVCount to integers before - // comparing. This may result in SCEV expension of pointers, but in practice + // comparing. This may result in SCEV expansion of pointers, but in practice // SCEV will fold the pointer arithmetic away as such: // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). // @@ -2482,23 +2482,13 @@ bool IndVarSimplify::run(Loop *L) { return Changed; } -PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM) { - auto &FAM = AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager(); +PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &) { Function *F = L.getHeader()->getParent(); const DataLayout &DL = F->getParent()->getDataLayout(); - auto *LI = FAM.getCachedResult<LoopAnalysis>(*F); - auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F); - auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F); - - assert((LI && SE && DT) && - "Analyses required for indvarsimplify not available!"); - - // Optional analyses. - auto *TTI = FAM.getCachedResult<TargetIRAnalysis>(*F); - auto *TLI = FAM.getCachedResult<TargetLibraryAnalysis>(*F); - - IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI); + IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI); if (!IVS.run(&L)) return PreservedAnalyses::all(); diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 6ef9d0561322..4c15c8a32bec 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -41,8 +41,8 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/LoopPassManager.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -61,6 +61,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -84,14 +85,17 @@ static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo); static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo); + const LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE); static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, const Loop *CurLoop, AliasSetTracker *CurAST, - const LoopSafetyInfo *SafetyInfo); -static bool isSafeToExecuteUnconditionally(const Instruction &Inst, + const LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE); +static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE, const Instruction *CtxI = nullptr); static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, const AAMDNodes &AAInfo, @@ -104,7 +108,8 @@ CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, namespace { struct LoopInvariantCodeMotion { bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, - TargetLibraryInfo *TLI, ScalarEvolution *SE, bool DeleteAST); + TargetLibraryInfo *TLI, ScalarEvolution *SE, + OptimizationRemarkEmitter *ORE, bool DeleteAST); DenseMap<Loop *, AliasSetTracker *> &getLoopToAliasSetMap() { return LoopToAliasSetMap; @@ -135,12 +140,16 @@ struct LegacyLICMPass : public LoopPass { } auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); + // For the old PM, we can't use OptimizationRemarkEmitter as an analysis + // pass. Function analyses need to be preserved across loop transformations + // but ORE cannot be preserved (see comment before the pass definition). + OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); return LICM.runOnLoop(L, &getAnalysis<AAResultsWrapperPass>().getAAResults(), &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), - SE ? &SE->getSE() : nullptr, false); + SE ? &SE->getSE() : nullptr, &ORE, false); } /// This transformation requires natural loop information & requires that @@ -176,21 +185,20 @@ private: }; } -PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM) { +PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &) { const auto &FAM = - AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager(); + AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager(); Function *F = L.getHeader()->getParent(); - auto *AA = FAM.getCachedResult<AAManager>(*F); - auto *LI = FAM.getCachedResult<LoopAnalysis>(*F); - auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F); - auto *TLI = FAM.getCachedResult<TargetLibraryAnalysis>(*F); - auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F); - assert((AA && LI && DT && TLI && SE) && "Analyses for LICM not available"); + auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F); + // FIXME: This should probably be optional rather than required. + if (!ORE) + report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not " + "cached at a higher level"); LoopInvariantCodeMotion LICM; - - if (!LICM.runOnLoop(&L, AA, LI, DT, TLI, SE, true)) + if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, ORE, true)) return PreservedAnalyses::all(); // FIXME: There is no setPreservesCFG in the new PM. When that becomes @@ -217,7 +225,9 @@ Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, - ScalarEvolution *SE, bool DeleteAST) { + ScalarEvolution *SE, + OptimizationRemarkEmitter *ORE, + bool DeleteAST) { bool Changed = false; assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); @@ -243,10 +253,10 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA, // if (L->hasDedicatedExits()) Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, - CurAST, &SafetyInfo); + CurAST, &SafetyInfo, ORE); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, - CurAST, &SafetyInfo); + CurAST, &SafetyInfo, ORE); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. @@ -279,7 +289,7 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA, for (AliasSet &AS : *CurAST) Promoted |= promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts, PIC, LI, DT, - TLI, L, CurAST, &SafetyInfo); + TLI, L, CurAST, &SafetyInfo, ORE); // Once we have promoted values across the loop body we have to // recursively reform LCSSA as any nested loop may now have values defined @@ -320,7 +330,8 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA, /// bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, - AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo) { + AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && @@ -336,7 +347,8 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, bool Changed = false; const std::vector<DomTreeNode *> &Children = N->getChildren(); for (DomTreeNode *Child : Children) - Changed |= sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); + Changed |= + sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo, ORE); // Only need to process the contents of this block if it is not part of a // subloop (which would already have been processed). @@ -363,9 +375,9 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // operands of the instruction are loop invariant. // if (isNotUsedInLoop(I, CurLoop, SafetyInfo) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo)) { + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) { ++II; - Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo); + Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo, ORE); } } return Changed; @@ -378,7 +390,8 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, /// bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, - AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo) { + AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && @@ -417,16 +430,17 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // is safe to hoist the instruction. // if (CurLoop->hasLoopInvariantOperands(&I) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo) && + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) && isSafeToExecuteUnconditionally( - I, DT, CurLoop, SafetyInfo, + I, DT, CurLoop, SafetyInfo, ORE, CurLoop->getLoopPreheader()->getTerminator())) - Changed |= hoist(I, DT, CurLoop, SafetyInfo); + Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE); } const std::vector<DomTreeNode *> &Children = N->getChildren(); for (DomTreeNode *Child : Children) - Changed |= hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); + Changed |= + hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo, ORE); return Changed; } @@ -465,7 +479,8 @@ void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) { bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, - LoopSafetyInfo *SafetyInfo) { + LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE) { // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { if (!LI->isUnordered()) @@ -486,7 +501,17 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, AAMDNodes AAInfo; LI->getAAMetadata(AAInfo); - return !pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST); + bool Invalidated = + pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST); + // Check loop-invariant address because this may also be a sinkable load + // whose address is not necessarily loop-invariant. + if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) + ORE->emit(OptimizationRemarkMissed( + DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI) + << "failed to move load with loop-invariant address " + "because the loop may invalidate its value"); + + return !Invalidated; } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { // Don't sink or hoist dbg info; it's legal, but not useful. if (isa<DbgInfoIntrinsic>(I)) @@ -680,8 +705,11 @@ CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, /// static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, const Loop *CurLoop, AliasSetTracker *CurAST, - const LoopSafetyInfo *SafetyInfo) { + const LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE) { DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); + ORE->emit(OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) + << "sinking " << ore::NV("Inst", &I)); bool Changed = false; if (isa<LoadInst>(I)) ++NumMovedLoads; @@ -748,10 +776,13 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, /// is safe to hoist, this instruction is called to do the dirty work. /// static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo) { + const LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE) { auto *Preheader = CurLoop->getLoopPreheader(); DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I << "\n"); + ORE->emit(OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) + << "hosting " << ore::NV("Inst", &I)); // Metadata can be dependent on conditions we are hoisting above. // Conservatively strip all metadata on the instruction unless we were @@ -786,15 +817,28 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, /// Only sink or hoist an instruction if it is not a trapping instruction, /// or if the instruction is known not to trap when moved to the preheader. /// or if it is a trapping instruction and is guaranteed to execute. -static bool isSafeToExecuteUnconditionally(const Instruction &Inst, +static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE, const Instruction *CtxI) { if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT)) return true; - return isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo); + bool GuaranteedToExecute = + isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo); + + if (!GuaranteedToExecute) { + auto *LI = dyn_cast<LoadInst>(&Inst); + if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand())) + ORE->emit(OptimizationRemarkMissed( + DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI) + << "failed to hoist load with loop-invariant address " + "because load is conditionally executed"); + } + + return GuaranteedToExecute; } namespace { @@ -882,7 +926,8 @@ bool llvm::promoteLoopAccessesToScalars( AliasSet &AS, SmallVectorImpl<BasicBlock *> &ExitBlocks, SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, - Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo) { + Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && @@ -982,14 +1027,14 @@ bool llvm::promoteLoopAccessesToScalars( // If there is an non-load/store instruction in the loop, we can't promote // it. - if (const LoadInst *Load = dyn_cast<LoadInst>(UI)) { + if (LoadInst *Load = dyn_cast<LoadInst>(UI)) { assert(!Load->isVolatile() && "AST broken"); if (!Load->isSimple()) return false; if (!DereferenceableInPH) DereferenceableInPH = isSafeToExecuteUnconditionally( - *Load, DT, CurLoop, SafetyInfo, Preheader->getTerminator()); + *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator()); } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) { // Stores *of* the pointer are not interesting, only stores *to* the // pointer. @@ -1074,6 +1119,9 @@ bool llvm::promoteLoopAccessesToScalars( // Otherwise, this is safe to promote, lets do it! DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr << '\n'); + ORE->emit( + OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", LoopUses[0]) + << "Moving accesses to memory location out of the loop"); ++NumPromoted; // Grab a debug location for the inserted loads/stores; given that the diff --git a/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp b/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp new file mode 100644 index 000000000000..a64c99117d64 --- /dev/null +++ b/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp @@ -0,0 +1,25 @@ +//===- LoopAccessAnalysisPrinter.cpp - Loop Access Analysis Printer --------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +using namespace llvm; + +#define DEBUG_TYPE "loop-accesses" + +PreservedAnalyses +LoopAccessInfoPrinterPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &) { + Function &F = *L.getHeader()->getParent(); + auto &LAI = AM.getResult<LoopAccessAnalysis>(L, AR); + OS << "Loop access info in function '" << F.getName() << "':\n"; + OS.indent(2) << L.getHeader()->getName() << ":\n"; + LAI.print(OS, 4); + return PreservedAnalyses::all(); +} diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 187e6e3073c7..cca75a365024 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -19,9 +19,9 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/LoopPassManager.h" #include "llvm/IR/Dominators.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; @@ -215,15 +215,10 @@ bool LoopDeletionPass::runImpl(Loop *L, DominatorTree &DT, ScalarEvolution &SE, return Changed; } -PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM) { - auto &FAM = AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager(); - Function *F = L.getHeader()->getParent(); - - auto &DT = *FAM.getCachedResult<DominatorTreeAnalysis>(*F); - auto &SE = *FAM.getCachedResult<ScalarEvolutionAnalysis>(*F); - auto &LI = *FAM.getCachedResult<LoopAnalysis>(*F); - - bool Changed = runImpl(&L, DT, SE, LI); +PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &) { + bool Changed = runImpl(&L, AR.DT, AR.SE, AR.LI); if (!Changed) return PreservedAnalyses::all(); diff --git a/lib/Transforms/Scalar/LoopDistribute.cpp b/lib/Transforms/Scalar/LoopDistribute.cpp index b2b2f72aa83d..19716b28ad66 100644 --- a/lib/Transforms/Scalar/LoopDistribute.cpp +++ b/lib/Transforms/Scalar/LoopDistribute.cpp @@ -31,13 +31,13 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPassManager.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -946,10 +946,18 @@ PreservedAnalyses LoopDistributePass::run(Function &F, auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F); auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); + // We don't directly need these analyses but they're required for loop + // analyses so provide them below. + auto &AA = AM.getResult<AAManager>(F); + auto &AC = AM.getResult<AssumptionAnalysis>(F); + auto &TTI = AM.getResult<TargetIRAnalysis>(F); + auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); + auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager(); std::function<const LoopAccessInfo &(Loop &)> GetLAA = [&](Loop &L) -> const LoopAccessInfo & { - return LAM.getResult<LoopAccessAnalysis>(L); + LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI}; + return LAM.getResult<LoopAccessAnalysis>(L, AR); }; bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA); diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 2743574ecca6..5fec51c095d0 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -46,7 +46,6 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/LoopPassManager.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -61,6 +60,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -186,24 +186,12 @@ public: }; } // End anonymous namespace. -PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, - LoopAnalysisManager &AM) { - const auto &FAM = - AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager(); - Function *F = L.getHeader()->getParent(); - - // Use getCachedResult because Loop pass cannot trigger a function analysis. - auto *AA = FAM.getCachedResult<AAManager>(*F); - auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F); - auto *LI = FAM.getCachedResult<LoopAnalysis>(*F); - auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F); - auto *TLI = FAM.getCachedResult<TargetLibraryAnalysis>(*F); - const auto *TTI = FAM.getCachedResult<TargetIRAnalysis>(*F); +PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &) { const auto *DL = &L.getHeader()->getModule()->getDataLayout(); - assert((AA && DT && LI && SE && TLI && TTI && DL) && - "Analyses for Loop Idiom Recognition not available"); - LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL); + LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL); if (!LIR.runOnLoop(&L)) return PreservedAnalyses::all(); diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index f6620ad1ade5..69102d10ff60 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -18,7 +18,6 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/LoopPassManager.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" @@ -26,6 +25,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; @@ -183,20 +183,10 @@ public: }; } -PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, - LoopAnalysisManager &AM) { - const auto &FAM = - AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager(); - Function *F = L.getHeader()->getParent(); - - // Use getCachedResult because Loop pass cannot trigger a function analysis. - auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F); - auto *LI = FAM.getCachedResult<LoopAnalysis>(*F); - auto *AC = FAM.getCachedResult<AssumptionAnalysis>(*F); - const auto *TLI = FAM.getCachedResult<TargetLibraryAnalysis>(*F); - assert((LI && AC && TLI) && "Analyses for Loop Inst Simplify not available"); - - if (!SimplifyLoopInst(&L, DT, LI, AC, TLI)) +PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &) { + if (!SimplifyLoopInst(&L, &AR.DT, &AR.LI, &AR.AC, &AR.TLI)) return PreservedAnalyses::all(); return getLoopPassPreservedAnalyses(); diff --git a/lib/Transforms/Scalar/LoopPassManager.cpp b/lib/Transforms/Scalar/LoopPassManager.cpp new file mode 100644 index 000000000000..028f4bba8b1d --- /dev/null +++ b/lib/Transforms/Scalar/LoopPassManager.cpp @@ -0,0 +1,85 @@ +//===- LoopPassManager.cpp - Loop pass management -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Analysis/LoopInfo.h" + +using namespace llvm; + +// Explicit template instantiations and specialization defininitions for core +// template typedefs. +namespace llvm { +template class PassManager<Loop, LoopAnalysisManager, + LoopStandardAnalysisResults &, LPMUpdater &>; + +/// Explicitly specialize the pass manager's run method to handle loop nest +/// structure updates. +template <> +PreservedAnalyses +PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, + LPMUpdater &>::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U) { + PreservedAnalyses PA = PreservedAnalyses::all(); + + if (DebugLogging) + dbgs() << "Starting Loop pass manager run.\n"; + + for (auto &Pass : Passes) { + if (DebugLogging) + dbgs() << "Running pass: " << Pass->name() << " on " << L; + + PreservedAnalyses PassPA = Pass->run(L, AM, AR, U); + + // If the loop was deleted, abort the run and return to the outer walk. + if (U.skipCurrentLoop()) { + PA.intersect(std::move(PassPA)); + break; + } + + // Update the analysis manager as each pass runs and potentially + // invalidates analyses. + AM.invalidate(L, PassPA); + + // Finally, we intersect the final preserved analyses to compute the + // aggregate preserved set for this pass manager. + PA.intersect(std::move(PassPA)); + + // FIXME: Historically, the pass managers all called the LLVM context's + // yield function here. We don't have a generic way to acquire the + // context and it isn't yet clear what the right pattern is for yielding + // in the new pass manager so it is currently omitted. + // ...getContext().yield(); + } + + // Invalidation for the current loop should be handled above, and other loop + // analysis results shouldn't be impacted by runs over this loop. Therefore, + // the remaining analysis results in the AnalysisManager are preserved. We + // mark this with a set so that we don't need to inspect each one + // individually. + // FIXME: This isn't correct! This loop and all nested loops' analyses should + // be preserved, but unrolling should invalidate the parent loop's analyses. + PA.preserveSet<AllAnalysesOn<Loop>>(); + + if (DebugLogging) + dbgs() << "Finished Loop pass manager run.\n"; + + return PA; +} +} + +PrintLoopPass::PrintLoopPass() : OS(dbgs()) {} +PrintLoopPass::PrintLoopPass(raw_ostream &OS, const std::string &Banner) + : OS(OS), Banner(Banner) {} + +PreservedAnalyses PrintLoopPass::run(Loop &L, LoopAnalysisManager &, + LoopStandardAnalysisResults &, + LPMUpdater &) { + printLoop(L, OS, Banner); + return PreservedAnalyses::all(); +} diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 0225cc325700..cc83069d5f52 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -14,13 +14,12 @@ #include "llvm/Transforms/Scalar/LoopRotation.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/LoopPassManager.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -34,6 +33,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -625,20 +625,11 @@ bool LoopRotate::processLoop(Loop *L) { LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication) : EnableHeaderDuplication(EnableHeaderDuplication) {} -PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM) { - auto &FAM = AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager(); - Function *F = L.getHeader()->getParent(); - - auto *LI = FAM.getCachedResult<LoopAnalysis>(*F); - const auto *TTI = FAM.getCachedResult<TargetIRAnalysis>(*F); - auto *AC = FAM.getCachedResult<AssumptionAnalysis>(*F); - assert((LI && TTI && AC) && "Analyses for loop rotation not available"); - - // Optional analyses. - auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F); - auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F); +PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &) { int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0; - LoopRotate LR(Threshold, LI, TTI, AC, DT, SE); + LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE); bool Changed = LR.processLoop(&L); if (!Changed) diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index d37339fc5fee..16061212ba38 100644 --- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -18,18 +18,18 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/LoopPassManager.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; @@ -64,16 +64,10 @@ static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI) { return Changed; } -PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM) { - const auto &FAM = - AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager(); - Function *F = L.getHeader()->getParent(); - - auto *LI = FAM.getCachedResult<LoopAnalysis>(*F); - auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F); - assert((LI && DT) && "Analyses for LoopSimplifyCFG not available"); - - if (!simplifyLoopCFG(L, *DT, *LI)) +PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &) { + if (!simplifyLoopCFG(L, AR.DT, AR.LI)) return PreservedAnalyses::all(); return getLoopPassPreservedAnalyses(); } diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp index f64354497771..f3f415275c0e 100644 --- a/lib/Transforms/Scalar/LoopSink.cpp +++ b/lib/Transforms/Scalar/LoopSink.cpp @@ -38,7 +38,6 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/LoopPassManager.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/Dominators.h" @@ -47,6 +46,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; @@ -283,6 +283,9 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, // sinked. for (auto II = Preheader->rbegin(), E = Preheader->rend(); II != E;) { Instruction *I = &*II++; + // No need to check for instruction's operands are loop invariant. + assert(L.hasLoopInvariantOperands(I) && + "Insts in a loop's preheader should have loop invariant operands!"); if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr)) continue; if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI)) diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index a61f646042ae..a1561fc0a6c2 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -59,16 +59,15 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/LoopPassManager.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -80,13 +79,13 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Operator.h" -#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" @@ -99,6 +98,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> @@ -5052,21 +5052,11 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { return ReduceLoopStrength(L, IU, SE, DT, LI, TTI); } -PreservedAnalyses LoopStrengthReducePass::run(Loop &L, - LoopAnalysisManager &AM) { - const auto &FAM = - AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager(); - Function *F = L.getHeader()->getParent(); - - auto &IU = AM.getResult<IVUsersAnalysis>(L); - auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F); - auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F); - auto *LI = FAM.getCachedResult<LoopAnalysis>(*F); - auto *TTI = FAM.getCachedResult<TargetIRAnalysis>(*F); - assert((SE && DT && LI && TTI) && - "Analyses for Loop Strength Reduce not available"); - - if (!ReduceLoopStrength(&L, IU, *SE, *DT, *LI, *TTI)) +PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &) { + if (!ReduceLoopStrength(&L, AM.getResult<IVUsersAnalysis>(L, AR), AR.SE, + AR.DT, AR.LI, AR.TTI)) return PreservedAnalyses::all(); return getLoopPassPreservedAnalyses(); diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index f66369b30369..c7f91226d222 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -19,7 +19,6 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/LoopPassManager.h" #include "llvm/Analysis/LoopUnrollAnalyzer.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -33,6 +32,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include <climits> @@ -1111,41 +1111,23 @@ Pass *llvm::createSimpleLoopUnrollPass() { return llvm::createLoopUnrollPass(-1, -1, 0, 0, 0); } -PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM) { +PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &) { const auto &FAM = - AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager(); + AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager(); Function *F = L.getHeader()->getParent(); - - DominatorTree *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F); - LoopInfo *LI = FAM.getCachedResult<LoopAnalysis>(*F); - ScalarEvolution *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F); - auto *TTI = FAM.getCachedResult<TargetIRAnalysis>(*F); - auto *AC = FAM.getCachedResult<AssumptionAnalysis>(*F); auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F); - if (!DT) - report_fatal_error( - "LoopUnrollPass: DominatorTreeAnalysis not cached at a higher level"); - if (!LI) - report_fatal_error( - "LoopUnrollPass: LoopAnalysis not cached at a higher level"); - if (!SE) - report_fatal_error( - "LoopUnrollPass: ScalarEvolutionAnalysis not cached at a higher level"); - if (!TTI) - report_fatal_error( - "LoopUnrollPass: TargetIRAnalysis not cached at a higher level"); - if (!AC) - report_fatal_error( - "LoopUnrollPass: AssumptionAnalysis not cached at a higher level"); + // FIXME: This should probably be optional rather than required. if (!ORE) report_fatal_error("LoopUnrollPass: OptimizationRemarkEmitterAnalysis not " "cached at a higher level"); - bool Changed = - tryToUnrollLoop(&L, *DT, LI, SE, *TTI, *AC, *ORE, /*PreserveLCSSA*/ true, - ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, - ProvidedRuntime, ProvidedUpperBound); + bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE, + /*PreserveLCSSA*/ true, ProvidedCount, + ProvidedThreshold, ProvidedAllowPartial, + ProvidedRuntime, ProvidedUpperBound); if (!Changed) return PreservedAnalyses::all(); diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index eef7db08cd46..e1b6741f31b4 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -135,6 +135,10 @@ struct CongruenceClass { // purposes, and for skipping empty classes. bool Dead = false; + // Number of stores in this congruence class. + // This is used so we can detect store equivalence changes properly. + int StoreCount = 0; + explicit CongruenceClass(unsigned ID) : ID(ID) {} CongruenceClass(unsigned ID, Value *Leader, const Expression *E) : ID(ID), RepLeader(Leader), DefiningExpr(E) {} @@ -198,7 +202,7 @@ class NewGVN : public FunctionPass { ExpressionClassMap ExpressionToClass; // Which values have changed as a result of leader changes. - SmallPtrSet<Value *, 8> ChangedValues; + SmallPtrSet<Value *, 8> LeaderChanges; // Reachability info. using BlockEdge = BasicBlockEdge; @@ -317,7 +321,8 @@ private: template <class T> Value *lookupOperandLeader(Value *, const User *, const T &) const; void performCongruenceFinding(Value *, const Expression *); - + void moveValueToNewCongruenceClass(Value *, CongruenceClass *, + CongruenceClass *); // Reachability handling. void updateReachableEdge(BasicBlock *, BasicBlock *); void processOutgoingEdges(TerminatorInst *, BasicBlock *); @@ -347,7 +352,8 @@ private: void cleanupTables(); std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned); void updateProcessedCount(Value *V); - void verifyMemoryCongruency(); + void verifyMemoryCongruency() const; + bool singleReachablePHIPath(const MemoryAccess *, const MemoryAccess *) const; }; char NewGVN::ID = 0; @@ -717,10 +723,10 @@ const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI, // Utility function to check whether the congruence class has a member other // than the given instruction. bool hasMemberOtherThanUs(const CongruenceClass *CC, Instruction *I) { - // Either it has more than one member, in which case it must contain something - // other than us (because it's indexed by value), or if it only has one member + // Either it has more than one store, in which case it must contain something + // other than us (because it's indexed by value), or if it only has one store // right now, that member should not be us. - return CC->Members.size() > 1 || CC->Members.count(I) == 0; + return CC->StoreCount > 1 || CC->Members.count(I) == 0; } const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I, @@ -1044,7 +1050,40 @@ void NewGVN::markLeaderChangeTouched(CongruenceClass *CC) { for (auto M : CC->Members) { if (auto *I = dyn_cast<Instruction>(M)) TouchedInstructions.set(InstrDFS[I]); - ChangedValues.insert(M); + LeaderChanges.insert(M); + } +} + +// Move a value, currently in OldClass, to be part of NewClass +// Update OldClass for the move (including changing leaders, etc) +void NewGVN::moveValueToNewCongruenceClass(Value *V, CongruenceClass *OldClass, + CongruenceClass *NewClass) { + DEBUG(dbgs() << "New congruence class for " << V << " is " << NewClass->ID + << "\n"); + OldClass->Members.erase(V); + NewClass->Members.insert(V); + if (isa<StoreInst>(V)) { + --OldClass->StoreCount; + assert(OldClass->StoreCount >= 0); + ++NewClass->StoreCount; + assert(NewClass->StoreCount > 0); + } + + ValueToClass[V] = NewClass; + // See if we destroyed the class or need to swap leaders. + if (OldClass->Members.empty() && OldClass != InitialClass) { + if (OldClass->DefiningExpr) { + OldClass->Dead = true; + DEBUG(dbgs() << "Erasing expression " << OldClass->DefiningExpr + << " from table\n"); + ExpressionToClass.erase(OldClass->DefiningExpr); + } + } else if (OldClass->RepLeader == V) { + // When the leader changes, the value numbering of + // everything may change due to symbolization changes, so we need to + // reprocess. + OldClass->RepLeader = *(OldClass->Members.begin()); + markLeaderChangeTouched(OldClass); } } @@ -1101,33 +1140,16 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { assert(!EClass->Dead && "We accidentally looked up a dead class"); } } - bool WasInChanged = ChangedValues.erase(V); - if (VClass != EClass || WasInChanged) { + bool ClassChanged = VClass != EClass; + bool LeaderChanged = LeaderChanges.erase(V); + if (ClassChanged || LeaderChanged) { DEBUG(dbgs() << "Found class " << EClass->ID << " for expression " << E << "\n"); - if (VClass != EClass) { - DEBUG(dbgs() << "New congruence class for " << V << " is " << EClass->ID - << "\n"); - - VClass->Members.erase(V); - EClass->Members.insert(V); - ValueToClass[V] = EClass; - // See if we destroyed the class or need to swap leaders. - if (VClass->Members.empty() && VClass != InitialClass) { - if (VClass->DefiningExpr) { - VClass->Dead = true; - DEBUG(dbgs() << "Erasing expression " << *E << " from table\n"); - ExpressionToClass.erase(VClass->DefiningExpr); - } - } else if (VClass->RepLeader == V) { - // When the leader changes, the value numbering of - // everything may change due to symbolization changes, so we need to - // reprocess. - VClass->RepLeader = *(VClass->Members.begin()); - markLeaderChangeTouched(VClass); - } - } + if (ClassChanged) + + moveValueToNewCongruenceClass(V, VClass, EClass); + markUsersTouched(V); if (auto *I = dyn_cast<Instruction>(V)) { @@ -1315,9 +1337,12 @@ void NewGVN::initializeCongruenceClasses(Function &F) { // MemoryDef's for stores and all MemoryPhis to be equal. Right now, no // other expression can generate a memory equivalence. If we start // handling memcpy/etc, we can expand this. - if (isa<StoreInst>(&I)) + if (isa<StoreInst>(&I)) { MemoryAccessEquiv.insert( {MSSA->getMemoryAccess(&I), MSSA->getLiveOnEntryDef()}); + ++InitialClass->StoreCount; + assert(InitialClass->StoreCount > 0); + } } } InitialClass->Members.swap(InitialValues); @@ -1454,9 +1479,40 @@ void NewGVN::valueNumberInstruction(Instruction *I) { } } +// Check if there is a path, using single or equal argument phi nodes, from +// First to Second. +bool NewGVN::singleReachablePHIPath(const MemoryAccess *First, + const MemoryAccess *Second) const { + if (First == Second) + return true; + + if (auto *FirstDef = dyn_cast<MemoryUseOrDef>(First)) { + auto *DefAccess = FirstDef->getDefiningAccess(); + return singleReachablePHIPath(DefAccess, Second); + } else { + auto *MP = cast<MemoryPhi>(First); + auto ReachableOperandPred = [&](const Use &U) { + return ReachableBlocks.count(MP->getIncomingBlock(U)); + }; + auto FilteredPhiArgs = + make_filter_range(MP->operands(), ReachableOperandPred); + SmallVector<const Value *, 32> OperandList; + std::copy(FilteredPhiArgs.begin(), FilteredPhiArgs.end(), + std::back_inserter(OperandList)); + bool Okay = OperandList.size() == 1; + if (!Okay) + Okay = std::equal(OperandList.begin(), OperandList.end(), + OperandList.begin()); + if (Okay) + return singleReachablePHIPath(cast<MemoryAccess>(OperandList[0]), Second); + return false; + } +} + // Verify the that the memory equivalence table makes sense relative to the -// congruence classes. -void NewGVN::verifyMemoryCongruency() { +// congruence classes. Note that this checking is not perfect, and is currently +// subject to very rare false negatives. It is only useful for testing/debugging. +void NewGVN::verifyMemoryCongruency() const { // Anything equivalent in the memory access table should be in the same // congruence class. @@ -1483,11 +1539,12 @@ void NewGVN::verifyMemoryCongruency() { if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) { auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second); if (FirstMUD && SecondMUD) - assert( - ValueToClass.lookup(FirstMUD->getMemoryInst()) == - ValueToClass.lookup(SecondMUD->getMemoryInst()) && - "The instructions for these memory operations should have been in " - "the same congruence class"); + assert((singleReachablePHIPath(FirstMUD, SecondMUD) || + ValueToClass.lookup(FirstMUD->getMemoryInst()) == + ValueToClass.lookup(SecondMUD->getMemoryInst())) && + "The instructions for these memory operations should have " + "been in the same congruence class or reachable through" + "a single argument phi"); } else if (auto *FirstMP = dyn_cast<MemoryPhi>(KV.first)) { // We can only sanely verify that MemoryDefs in the operand list all have diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index fa2235e8439a..49ce0262c97b 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -792,6 +792,7 @@ void StructurizeCFG::handleLoops(bool ExitUseAllowed, LoopFunc, LoopStart); BranchInst::Create(LoopStart, NewEntry); + DT->setNewRoot(NewEntry); } // Create an extra loop end node diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index e551e4b47ac1..f9a602bc268a 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -172,6 +172,36 @@ static bool needToInsertPhisForLCSSA(Loop *L, std::vector<BasicBlock *> Blocks, return false; } +/// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary +/// and adds a mapping from the original loop to the new loop to NewLoops. +/// Returns nullptr if no new loop was created and a pointer to the +/// original loop OriginalBB was part of otherwise. +const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB, + BasicBlock *ClonedBB, LoopInfo *LI, + NewLoopsMap &NewLoops) { + // Figure out which loop New is in. + const Loop *OldLoop = LI->getLoopFor(OriginalBB); + assert(OldLoop && "Should (at least) be in the loop being unrolled!"); + + Loop *&NewLoop = NewLoops[OldLoop]; + if (!NewLoop) { + // Found a new sub-loop. + assert(OriginalBB == OldLoop->getHeader() && + "Header should be first in RPO"); + + Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); + assert(NewLoopParent && + "Expected parent loop before sub-loop in RPO"); + NewLoop = new Loop; + NewLoopParent->addChildLoop(NewLoop); + NewLoop->addBasicBlockToLoop(ClonedBB, *LI); + return OldLoop; + } else { + NewLoop->addBasicBlockToLoop(ClonedBB, *LI); + return nullptr; + } +} + /// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was successful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional @@ -428,28 +458,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop"); L->addBasicBlockToLoop(New, *LI); } else { - // Figure out which loop New is in. - const Loop *OldLoop = LI->getLoopFor(*BB); - assert(OldLoop && "Should (at least) be in the loop being unrolled!"); - - Loop *&NewLoop = NewLoops[OldLoop]; - if (!NewLoop) { - // Found a new sub-loop. - assert(*BB == OldLoop->getHeader() && - "Header should be first in RPO"); - - Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); - assert(NewLoopParent && - "Expected parent loop before sub-loop in RPO"); - NewLoop = new Loop; - NewLoopParent->addChildLoop(NewLoop); - LoopsToSimplify.insert(NewLoop); + const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops); + if (OldLoop) { + LoopsToSimplify.insert(NewLoops[OldLoop]); // Forget the old loop, since its inputs may have changed. if (SE) SE->forgetLoop(OldLoop); } - NewLoop->addBasicBlockToLoop(New, *LI); } if (*BB == Header) diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 5758a415f12b..85da3ba899a5 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -301,15 +301,17 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, LI->addTopLevelLoop(NewLoop); } + NewLoopsMap NewLoops; + NewLoops[L] = NewLoop; // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); - if (NewLoop) - NewLoop->addBasicBlockToLoop(NewBB, *LI); - else if (ParentLoop) + if (NewLoop) { + addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); + } else if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index 09e9f1ddc7fe..c8efa9efc7f3 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -869,8 +869,13 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, return false; } - assert(TheLoop->getHeader() == Phi->getParent() && - "PHI is an AddRec for a different loop?!"); + if (AR->getLoop() != TheLoop) { + // FIXME: We should treat this as a uniform. Unfortunately, we + // don't currently know how to handled uniform PHIs. + DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); + return false; + } + Value *StartValue = Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); const SCEV *Step = AR->getStepRecurrence(*SE); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 54390e77bb1f..6e30919246c7 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1275,10 +1275,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, LLVMContext::MD_mem_parallel_loop_access}; combineMetadata(I1, I2, KnownIDs); - // If the debug loc for I1 and I2 are different, as we are combining them - // into one instruction, we do not want to select debug loc randomly from - // I1 or I2. - if (!isa<CallInst>(I1) && I1->getDebugLoc() != I2->getDebugLoc()) + // I1 and I2 are being combined into a single instruction. Its debug + // location is the merged locations of the original instructions. + if (!isa<CallInst>(I1)) I1->setDebugLoc( DILocation::getMergedLocation(I1->getDebugLoc(), I2->getDebugLoc())); @@ -1577,7 +1576,7 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { // The debug location for the "common" instruction is the merged locations of // all the commoned instructions. We start with the original location of the // "common" instruction and iteratively merge each location in the loop below. - DILocation *Loc = I0->getDebugLoc(); + const DILocation *Loc = I0->getDebugLoc(); // Update metadata and IR flags, and merge debug locations. for (auto *I : Insts) diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 11d54bcf4f89..8eaeb1073a76 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1074,6 +1074,24 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 return ConstantFP::get(CI->getType(), 1.0); + if (Op2C->isExactlyValue(-0.5) && + hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf, + LibFunc::sqrtl)) { + // If -ffast-math: + // pow(x, -0.5) -> 1.0 / sqrt(x) + if (CI->hasUnsafeAlgebra()) { + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + + // Here we cannot lower to an intrinsic because C99 sqrt() and llvm.sqrt + // are not guaranteed to have the same semantics. + Value *Sqrt = emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B, + Callee->getAttributes()); + + return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Sqrt, "sqrtrecip"); + } + } + if (Op2C->isExactlyValue(0.5) && hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf, LibFunc::sqrtl) && @@ -1121,6 +1139,10 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { !V.isInteger()) return nullptr; + // Propagate fast math flags. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + // We will memoize intermediate products of the Addition Chain. Value *InnerChain[33] = {nullptr}; InnerChain[1] = Op1; @@ -1131,7 +1153,6 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { bool ignored; V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &ignored); - // TODO: Should the new instructions propagate the 'fast' flag of the pow()? Value *FMul = getPow(InnerChain, V.convertToDouble(), B); // For negative exponents simply compute the reciprocal. if (Op2C->isNegative()) diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 578c65daf7c0..1b1f86f8efdc 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -80,6 +80,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" +#include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/Verifier.h" @@ -6949,9 +6950,9 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } else if (Legal->isUniform(Op2)) { Op2VK = TargetTransformInfo::OK_UniformValue; } - - return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK, - Op1VP, Op2VP); + SmallVector<const Value *, 4> Operands(I->operand_values()); + return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, + Op2VK, Op1VP, Op2VP, Operands); } case Instruction::Select: { SelectInst *SI = cast<SelectInst>(I); @@ -7641,7 +7642,7 @@ PreservedAnalyses LoopVectorizePass::run(Function &F, auto &TTI = AM.getResult<TargetIRAnalysis>(F); auto &DT = AM.getResult<DominatorTreeAnalysis>(F); auto &BFI = AM.getResult<BlockFrequencyAnalysis>(F); - auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F); + auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); auto &AA = AM.getResult<AAManager>(F); auto &AC = AM.getResult<AssumptionAnalysis>(F); auto &DB = AM.getResult<DemandedBitsAnalysis>(F); @@ -7650,10 +7651,11 @@ PreservedAnalyses LoopVectorizePass::run(Function &F, auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager(); std::function<const LoopAccessInfo &(Loop &)> GetLAA = [&](Loop &L) -> const LoopAccessInfo & { - return LAM.getResult<LoopAccessAnalysis>(L); + LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI}; + return LAM.getResult<LoopAccessAnalysis>(L, AR); }; bool Changed = - runImpl(F, SE, LI, TTI, DT, BFI, TLI, DB, AA, AC, GetLAA, ORE); + runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index bcaa8439cffa..1c7cbc7edf9a 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2493,10 +2493,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *LHS = vectorizeTree(LHSVL); Value *RHS = vectorizeTree(RHSVL); - if (LHS == RHS && isa<Instruction>(LHS)) { - assert((VL0->getOperand(0) == VL0->getOperand(1)) && "Invalid order"); - } - if (Value *V = alreadyVectorized(E->Scalars)) return V; |