diff options
Diffstat (limited to 'contrib/llvm/lib')
| -rw-r--r-- | contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp | 1 | ||||
| -rw-r--r-- | contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp | 21 | ||||
| -rw-r--r-- | contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp | 34 | ||||
| -rw-r--r-- | contrib/llvm/lib/Target/X86/X86ISelLowering.cpp | 32 | ||||
| -rw-r--r-- | contrib/llvm/lib/Target/X86/X86Subtarget.cpp | 3 | ||||
| -rw-r--r-- | contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 6 | ||||
| -rw-r--r-- | contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp | 129 | ||||
| -rw-r--r-- | contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 7 |
8 files changed, 148 insertions, 85 deletions
diff --git a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index 6387bb36166e..f5ba637e58e2 100644 --- a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -405,6 +405,7 @@ char ModuleSummaryIndexWrapperPass::ID = 0; INITIALIZE_PASS_BEGIN(ModuleSummaryIndexWrapperPass, "module-summary-analysis", "Module Summary Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(ModuleSummaryIndexWrapperPass, "module-summary-analysis", "Module Summary Analysis", false, true) diff --git a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 4a5d18e2db75..b05ab4b1da85 100644 --- a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -768,13 +768,12 @@ void MetadataLoader::MetadataLoaderImpl::lazyLoadOneMetadata( unsigned ID, PlaceholderQueue &Placeholders) { assert(ID < (MDStringRef.size()) + GlobalMetadataBitPosIndex.size()); assert(ID >= MDStringRef.size() && "Unexpected lazy-loading of MDString"); -#ifndef NDEBUG // Lookup first if the metadata hasn't already been loaded. if (auto *MD = MetadataList.lookup(ID)) { auto *N = dyn_cast_or_null<MDNode>(MD); - assert(N && N->isTemporary() && "Lazy loading an already loaded metadata"); + if (!N->isTemporary()) + return; } -#endif SmallVector<uint64_t, 64> Record; StringRef Blob; IndexCursor.JumpToBit(GlobalMetadataBitPosIndex[ID - MDStringRef.size()]); @@ -827,8 +826,22 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( auto getMD = [&](unsigned ID) -> Metadata * { if (ID < MDStringRef.size()) return lazyLoadOneMDString(ID); - if (!IsDistinct) + if (!IsDistinct) { + if (auto *MD = MetadataList.lookup(ID)) + return MD; + // If lazy-loading is enabled, we try recursively to load the operand + // instead of creating a temporary. + if (ID < (MDStringRef.size() + GlobalMetadataBitPosIndex.size())) { + // Create a temporary for the node that is referencing the operand we + // will lazy-load. It is needed before recursing in case there are + // uniquing cycles. + MetadataList.getMetadataFwdRef(NextMetadataNo); + lazyLoadOneMetadata(ID, Placeholders); + return MetadataList.lookup(ID); + } + // Return a temporary. return MetadataList.getMetadataFwdRef(ID); + } if (auto *MD = MetadataList.getMetadataIfResolved(ID)) return MD; return &Placeholders.getPlaceholderOp(ID); diff --git a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index a14b86179d6e..104fb199da08 100644 --- a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -829,11 +829,22 @@ static std::string writeGeneratedObject(int count, StringRef CacheEntryPath, // Main entry point for the ThinLTO processing void ThinLTOCodeGenerator::run() { + // Prepare the resulting object vector + assert(ProducedBinaries.empty() && "The generator should not be reused"); + if (SavedObjectsDirectoryPath.empty()) + ProducedBinaries.resize(Modules.size()); + else { + sys::fs::create_directories(SavedObjectsDirectoryPath); + bool IsDir; + sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir); + if (!IsDir) + report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'"); + ProducedBinaryFiles.resize(Modules.size()); + } + if (CodeGenOnly) { // Perform only parallel codegen and return. ThreadPool Pool; - assert(ProducedBinaries.empty() && "The generator should not be reused"); - ProducedBinaries.resize(Modules.size()); int count = 0; for (auto &ModuleBuffer : Modules) { Pool.async([&](int count) { @@ -845,7 +856,12 @@ void ThinLTOCodeGenerator::run() { /*IsImporting*/ false); // CodeGen - ProducedBinaries[count] = codegen(*TheModule); + auto OutputBuffer = codegen(*TheModule); + if (SavedObjectsDirectoryPath.empty()) + ProducedBinaries[count] = std::move(OutputBuffer); + else + ProducedBinaryFiles[count] = writeGeneratedObject( + count, "", SavedObjectsDirectoryPath, *OutputBuffer); }, count++); } @@ -866,18 +882,6 @@ void ThinLTOCodeGenerator::run() { WriteIndexToFile(*Index, OS); } - // Prepare the resulting object vector - assert(ProducedBinaries.empty() && "The generator should not be reused"); - if (SavedObjectsDirectoryPath.empty()) - ProducedBinaries.resize(Modules.size()); - else { - sys::fs::create_directories(SavedObjectsDirectoryPath); - bool IsDir; - sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir); - if (!IsDir) - report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'"); - ProducedBinaryFiles.resize(Modules.size()); - } // Prepare the module map. auto ModuleMap = generateModuleMap(Modules); diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index 787dff99367e..2f13b722eb3b 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29455,19 +29455,11 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// Combine brcond/cmov/setcc/.. based on comparing the result of -/// atomic_load_add to use EFLAGS produced by the addition -/// directly if possible. For example: -/// -/// (setcc (cmp (atomic_load_add x, -C) C), COND_E) -/// becomes: -/// (setcc (LADD x, -C), COND_E) -/// -/// and +/// Combine: /// (brcond/cmov/setcc .., (cmp (atomic_load_add x, 1), 0), COND_S) -/// becomes: +/// to: /// (brcond/cmov/setcc .., (LADD x, 1), COND_LE) -/// +/// i.e., reusing the EFLAGS produced by the LOCKed instruction. /// Note that this is only legal for some op/cc combinations. static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, SelectionDAG &DAG) { @@ -29482,7 +29474,7 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, if (!Cmp.hasOneUse()) return SDValue(); - // This applies to variations of the common case: + // This only applies to variations of the common case: // (icmp slt x, 0) -> (icmp sle (add x, 1), 0) // (icmp sge x, 0) -> (icmp sgt (add x, 1), 0) // (icmp sle x, 0) -> (icmp slt (sub x, 1), 0) @@ -29501,9 +29493,8 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, return SDValue(); auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS); - if (!CmpRHSC) + if (!CmpRHSC || CmpRHSC->getZExtValue() != 0) return SDValue(); - APInt Comparand = CmpRHSC->getAPIntValue(); const unsigned Opc = CmpLHS.getOpcode(); @@ -29519,19 +29510,16 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, if (Opc == ISD::ATOMIC_LOAD_SUB) Addend = -Addend; - if (Comparand == -Addend) { - // No change to CC. - } else if (CC == X86::COND_S && Comparand == 0 && Addend == 1) { + if (CC == X86::COND_S && Addend == 1) CC = X86::COND_LE; - } else if (CC == X86::COND_NS && Comparand == 0 && Addend == 1) { + else if (CC == X86::COND_NS && Addend == 1) CC = X86::COND_G; - } else if (CC == X86::COND_G && Comparand == 0 && Addend == -1) { + else if (CC == X86::COND_G && Addend == -1) CC = X86::COND_GE; - } else if (CC == X86::COND_LE && Comparand == 0 && Addend == -1) { + else if (CC == X86::COND_LE && Addend == -1) CC = X86::COND_L; - } else { + else return SDValue(); - } SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG); DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0), diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp index 727ff70c3ff6..586bb7bd7b1a 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp @@ -232,9 +232,6 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() || isTargetKFreeBSD() || In64BitMode) stackAlignment = 16; - - assert((!isPMULLDSlow() || hasSSE41()) && - "Feature Slow PMULLD can only be set on a subtarget with SSE4.1"); } void X86Subtarget::initializeEnvironment() { diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index a1561fc0a6c2..01728ae680de 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -3163,6 +3163,9 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { // Don't bother if the instruction is in a BB which ends in an EHPad. if (UseBB->getTerminator()->isEHPad()) continue; + // Don't bother rewriting PHIs in catchswitch blocks. + if (isa<CatchSwitchInst>(UserInst->getParent()->getTerminator())) + continue; // Ignore uses which are part of other SCEV expressions, to avoid // analyzing them multiple times. if (SE.isSCEVable(UserInst->getType())) { @@ -4672,7 +4675,8 @@ void LSRInstance::RewriteForPHI(PHINode *PN, // is the canonical backedge for this loop, which complicates post-inc // users. if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 && - !isa<IndirectBrInst>(BB->getTerminator())) { + !isa<IndirectBrInst>(BB->getTerminator()) && + !isa<CatchSwitchInst>(BB->getTerminator())) { BasicBlock *Parent = PN->getParent(); Loop *PNLoop = LI.getLoopFor(Parent); if (!PNLoop || Parent != PNLoop->getHeader()) { diff --git a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp index e1b6741f31b4..6043e04bb8c5 100644 --- a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -81,6 +81,10 @@ STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified"); STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same"); STATISTIC(NumGVNMaxIterations, "Maximum Number of iterations it took to converge GVN"); +STATISTIC(NumGVNLeaderChanges, "Number of leader changes"); +STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes"); +STATISTIC(NumGVNAvoidedSortedLeaderChanges, + "Number of avoided sorted leader changes"); //===----------------------------------------------------------------------===// // GVN Pass @@ -139,6 +143,10 @@ struct CongruenceClass { // This is used so we can detect store equivalence changes properly. int StoreCount = 0; + // The most dominating leader after our current leader, because the member set + // is not sorted and is expensive to keep sorted all the time. + std::pair<Value *, unsigned int> NextLeader = {nullptr, ~0U}; + explicit CongruenceClass(unsigned ID) : ID(ID) {} CongruenceClass(unsigned ID, Value *Leader, const Expression *E) : ID(ID), RepLeader(Leader), DefiningExpr(E) {} @@ -320,8 +328,8 @@ private: // Templated to allow them to work both on BB's and BB-edges. template <class T> Value *lookupOperandLeader(Value *, const User *, const T &) const; - void performCongruenceFinding(Value *, const Expression *); - void moveValueToNewCongruenceClass(Value *, CongruenceClass *, + void performCongruenceFinding(Instruction *, const Expression *); + void moveValueToNewCongruenceClass(Instruction *, CongruenceClass *, CongruenceClass *); // Reachability handling. void updateReachableEdge(BasicBlock *, BasicBlock *); @@ -1056,20 +1064,43 @@ void NewGVN::markLeaderChangeTouched(CongruenceClass *CC) { // Move a value, currently in OldClass, to be part of NewClass // Update OldClass for the move (including changing leaders, etc) -void NewGVN::moveValueToNewCongruenceClass(Value *V, CongruenceClass *OldClass, +void NewGVN::moveValueToNewCongruenceClass(Instruction *I, + CongruenceClass *OldClass, CongruenceClass *NewClass) { - DEBUG(dbgs() << "New congruence class for " << V << " is " << NewClass->ID + DEBUG(dbgs() << "New congruence class for " << I << " is " << NewClass->ID << "\n"); - OldClass->Members.erase(V); - NewClass->Members.insert(V); - if (isa<StoreInst>(V)) { + + if (I == OldClass->NextLeader.first) + OldClass->NextLeader = {nullptr, ~0U}; + + // The new instruction and new class leader may either be siblings in the + // dominator tree, or the new class leader should dominate the new member + // instruction. We simply check that the member instruction does not properly + // dominate the new class leader. + assert( + !isa<Instruction>(NewClass->RepLeader) || !NewClass->RepLeader || + I == NewClass->RepLeader || + !DT->properlyDominates( + I->getParent(), + cast<Instruction>(NewClass->RepLeader)->getParent()) && + "New class for instruction should not be dominated by instruction"); + + if (NewClass->RepLeader != I) { + auto DFSNum = InstrDFS.lookup(I); + if (DFSNum < NewClass->NextLeader.second) + NewClass->NextLeader = {I, DFSNum}; + } + + OldClass->Members.erase(I); + NewClass->Members.insert(I); + if (isa<StoreInst>(I)) { --OldClass->StoreCount; assert(OldClass->StoreCount >= 0); ++NewClass->StoreCount; assert(NewClass->StoreCount > 0); } - ValueToClass[V] = NewClass; + ValueToClass[I] = NewClass; // See if we destroyed the class or need to swap leaders. if (OldClass->Members.empty() && OldClass != InitialClass) { if (OldClass->DefiningExpr) { @@ -1078,25 +1109,48 @@ void NewGVN::moveValueToNewCongruenceClass(Value *V, CongruenceClass *OldClass, << " from table\n"); ExpressionToClass.erase(OldClass->DefiningExpr); } - } else if (OldClass->RepLeader == V) { + } else if (OldClass->RepLeader == I) { // When the leader changes, the value numbering of // everything may change due to symbolization changes, so we need to // reprocess. - OldClass->RepLeader = *(OldClass->Members.begin()); + DEBUG(dbgs() << "Leader change!\n"); + ++NumGVNLeaderChanges; + // We don't need to sort members if there is only 1, and we don't care about + // sorting the initial class because everything either gets out of it or is + // unreachable. + if (OldClass->Members.size() == 1 || OldClass == InitialClass) { + OldClass->RepLeader = *(OldClass->Members.begin()); + } else if (OldClass->NextLeader.first) { + ++NumGVNAvoidedSortedLeaderChanges; + OldClass->RepLeader = OldClass->NextLeader.first; + OldClass->NextLeader = {nullptr, ~0U}; + } else { + ++NumGVNSortedLeaderChanges; + // TODO: If this ends up to slow, we can maintain a dual structure for + // member testing/insertion, or keep things mostly sorted, and sort only + // here, or .... + std::pair<Value *, unsigned> MinDFS = {nullptr, ~0U}; + for (const auto X : OldClass->Members) { + auto DFSNum = InstrDFS.lookup(X); + if (DFSNum < MinDFS.second) + MinDFS = {X, DFSNum}; + } + OldClass->RepLeader = MinDFS.first; + } markLeaderChangeTouched(OldClass); } } // Perform congruence finding on a given value numbering expression. -void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { - ValueToExpression[V] = E; +void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { + ValueToExpression[I] = E; // This is guaranteed to return something, since it will at least find // INITIAL. - CongruenceClass *VClass = ValueToClass[V]; - assert(VClass && "Should have found a vclass"); + CongruenceClass *IClass = ValueToClass[I]; + assert(IClass && "Should have found a IClass"); // Dead classes should have been eliminated from the mapping. - assert(!VClass->Dead && "Found a dead class"); + assert(!IClass->Dead && "Found a dead class"); CongruenceClass *EClass; if (const auto *VE = dyn_cast<VariableExpression>(E)) { @@ -1118,13 +1172,13 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { NewClass->RepLeader = lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent()); } else { - NewClass->RepLeader = V; + NewClass->RepLeader = I; } assert(!isa<VariableExpression>(E) && "VariableExpression should have been handled already"); EClass = NewClass; - DEBUG(dbgs() << "Created new congruence class for " << *V + DEBUG(dbgs() << "Created new congruence class for " << *I << " using expression " << *E << " at " << NewClass->ID << " and leader " << *(NewClass->RepLeader) << "\n"); DEBUG(dbgs() << "Hash value was " << E->getHashValue() << "\n"); @@ -1140,36 +1194,31 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { assert(!EClass->Dead && "We accidentally looked up a dead class"); } } - bool ClassChanged = VClass != EClass; - bool LeaderChanged = LeaderChanges.erase(V); + bool ClassChanged = IClass != EClass; + bool LeaderChanged = LeaderChanges.erase(I); if (ClassChanged || LeaderChanged) { DEBUG(dbgs() << "Found class " << EClass->ID << " for expression " << E << "\n"); if (ClassChanged) - - moveValueToNewCongruenceClass(V, VClass, EClass); - - - markUsersTouched(V); - if (auto *I = dyn_cast<Instruction>(V)) { - if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) { - // If this is a MemoryDef, we need to update the equivalence table. If - // we determined the expression is congruent to a different memory - // state, use that different memory state. If we determined it didn't, - // we update that as well. Right now, we only support store - // expressions. - if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) && - EClass->Members.size() != 1) { - auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess(); - setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr); - } else { - setMemoryAccessEquivTo(MA, nullptr); - } - markMemoryUsersTouched(MA); + moveValueToNewCongruenceClass(I, IClass, EClass); + markUsersTouched(I); + if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) { + // If this is a MemoryDef, we need to update the equivalence table. If + // we determined the expression is congruent to a different memory + // state, use that different memory state. If we determined it didn't, + // we update that as well. Right now, we only support store + // expressions. + if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) && + EClass->Members.size() != 1) { + auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess(); + setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr); + } else { + setMemoryAccessEquivTo(MA, nullptr); } + markMemoryUsersTouched(MA); } - } else if (StoreInst *SI = dyn_cast<StoreInst>(V)) { + } else if (auto *SI = dyn_cast<StoreInst>(I)) { // There is, sadly, one complicating thing for stores. Stores do not // produce values, only consume them. However, in order to make loads and // stores value number the same, we ignore the value operand of the store. diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1b1f86f8efdc..dac7032fa08f 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5602,6 +5602,13 @@ void LoopVectorizationLegality::collectLoopUniforms() { // is consecutive-like, the pointer operand should remain uniform. else if (hasConsecutiveLikePtrOperand(&I)) ConsecutiveLikePtrs.insert(Ptr); + + // Otherwise, if the memory instruction will be vectorized and its + // pointer operand is non-consecutive-like, the memory instruction should + // be a gather or scatter operation. Its pointer operand will be + // non-uniform. + else + PossibleNonUniformPtrs.insert(Ptr); } // Add to the Worklist all consecutive and consecutive-like pointers that |
