diff options
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r-- | lib/Transforms/Scalar/CorrelatedValuePropagation.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Scalar/EarlyCSE.cpp | 5 | ||||
-rw-r--r-- | lib/Transforms/Scalar/GVNSink.cpp | 11 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 24 | ||||
-rw-r--r-- | lib/Transforms/Scalar/NewGVN.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/Scalar/RewriteStatepointsForGC.cpp | 71 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SCCP.cpp | 11 |
7 files changed, 82 insertions, 46 deletions
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 7b625b9b136ec..2a4c9526dfcd9 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -551,7 +551,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, const SimplifyQuery &SQ) { BBChanged = true; } } - }; + } FnChanged |= BBChanged; } diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index c4f450949e6de..0f92760a874b5 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -15,6 +15,7 @@ #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/ScopedHashTable.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" @@ -506,7 +507,7 @@ private: if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst)) { // Optimize MemoryPhi nodes that may become redundant by having all the // same input values once MA is removed. - SmallVector<MemoryPhi *, 4> PhisToCheck; + SmallSetVector<MemoryPhi *, 4> PhisToCheck; SmallVector<MemoryAccess *, 8> WorkQueue; WorkQueue.push_back(MA); // Process MemoryPhi nodes in FIFO order using a ever-growing vector since @@ -517,7 +518,7 @@ private: for (auto *U : WI->users()) if (MemoryPhi *MP = dyn_cast<MemoryPhi>(U)) - PhisToCheck.push_back(MP); + PhisToCheck.insert(MP); MSSAUpdater->removeMemoryAccess(WI); diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp index 8634816e702fb..5fd2dfc118b4b 100644 --- a/lib/Transforms/Scalar/GVNSink.cpp +++ b/lib/Transforms/Scalar/GVNSink.cpp @@ -64,6 +64,17 @@ using namespace llvm; STATISTIC(NumRemoved, "Number of instructions removed"); +namespace llvm { +namespace GVNExpression { + +LLVM_DUMP_METHOD void Expression::dump() const { + print(dbgs()); + dbgs() << "\n"; +} + +} +} + namespace { static bool isMemoryInst(const Instruction *I) { diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index b706152f30c81..8b435050ac769 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -983,21 +983,21 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW); + if (StoreSize != 1) + NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize), + SCEV::FlagNUW); + + Value *NumBytes = + Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); + unsigned Align = std::min(SI->getAlignment(), LI->getAlignment()); CallInst *NewCall = nullptr; // Check whether to generate an unordered atomic memcpy: // If the load or store are atomic, then they must neccessarily be unordered // by previous checks. - if (!SI->isAtomic() && !LI->isAtomic()) { - if (StoreSize != 1) - NumBytesS = SE->getMulExpr( - NumBytesS, SE->getConstant(IntPtrTy, StoreSize), SCEV::FlagNUW); - - Value *NumBytes = - Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); - + if (!SI->isAtomic() && !LI->isAtomic()) NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align); - } else { + else { // We cannot allow unaligned ops for unordered load/store, so reject // anything where the alignment isn't at least the element size. if (Align < StoreSize) @@ -1010,11 +1010,9 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize()) return false; - Value *NumElements = - Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); + NewCall = Builder.CreateElementUnorderedAtomicMemCpy( + StoreBasePtr, LoadBasePtr, NumBytes, StoreSize); - NewCall = Builder.CreateElementAtomicMemCpy(StoreBasePtr, LoadBasePtr, - NumElements, StoreSize); // Propagate alignment info onto the pointer args. Note that unordered // atomic loads/stores are *required* by the spec to have an alignment // but non-atomic loads/stores may not. diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 6926aae37963f..cbbd55512c9f5 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -2195,7 +2195,7 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, // For a given expression, mark the phi of ops instructions that could have // changed as a result. void NewGVN::markPhiOfOpsChanged(const Expression *E) { - touchAndErase(ExpressionToPhiOfOps, E); + touchAndErase(ExpressionToPhiOfOps, ExactEqualsExpression(*E)); } // Perform congruence finding on a given value numbering expression. @@ -3561,7 +3561,7 @@ bool NewGVN::eliminateInstructions(Function &F) { // TODO: It would be faster to use getNumIncomingBlocks() on a phi node in // the block and subtract the pred count, but it's more complicated. if (ReachablePredCount.lookup(BB) != - std::distance(pred_begin(BB), pred_end(BB))) { + unsigned(std::distance(pred_begin(BB), pred_end(BB)))) { for (auto II = BB->begin(); isa<PHINode>(II); ++II) { auto &PHI = cast<PHINode>(*II); ReplaceUnreachablePHIArgs(PHI, BB); diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index bae7911d222c9..a52739bb76f71 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -89,10 +89,10 @@ struct RewriteStatepointsForGC : public ModulePass { Changed |= runOnFunction(F); if (Changed) { - // stripNonValidAttributes asserts that shouldRewriteStatepointsIn + // stripNonValidAttributesAndMetadata asserts that shouldRewriteStatepointsIn // returns true for at least one function in the module. Since at least // one function changed, we know that the precondition is satisfied. - stripNonValidAttributes(M); + stripNonValidAttributesAndMetadata(M); } return Changed; @@ -105,20 +105,24 @@ struct RewriteStatepointsForGC : public ModulePass { AU.addRequired<TargetTransformInfoWrapperPass>(); } - /// The IR fed into RewriteStatepointsForGC may have had attributes implying - /// dereferenceability that are no longer valid/correct after - /// RewriteStatepointsForGC has run. This is because semantically, after + /// The IR fed into RewriteStatepointsForGC may have had attributes and + /// metadata implying dereferenceability that are no longer valid/correct after + /// RewriteStatepointsForGC has run. This is because semantically, after /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire - /// heap. stripNonValidAttributes (conservatively) restores correctness - /// by erasing all attributes in the module that externally imply - /// dereferenceability. - /// Similar reasoning also applies to the noalias attributes. gc.statepoint - /// can touch the entire heap including noalias objects. - void stripNonValidAttributes(Module &M); - - // Helpers for stripNonValidAttributes - void stripNonValidAttributesFromBody(Function &F); + /// heap. stripNonValidAttributesAndMetadata (conservatively) restores + /// correctness by erasing all attributes in the module that externally imply + /// dereferenceability. Similar reasoning also applies to the noalias + /// attributes and metadata. gc.statepoint can touch the entire heap including + /// noalias objects. + void stripNonValidAttributesAndMetadata(Module &M); + + // Helpers for stripNonValidAttributesAndMetadata + void stripNonValidAttributesAndMetadataFromBody(Function &F); void stripNonValidAttributesFromPrototype(Function &F); + // Certain metadata on instructions are invalid after running RS4GC. + // Optimizations that run after RS4GC can incorrectly use this metadata to + // optimize functions. We drop such metadata on the instruction. + void stripInvalidMetadataFromInstruction(Instruction &I); }; } // namespace @@ -2306,13 +2310,44 @@ RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) { RemoveNonValidAttrAtIndex(Ctx, F, AttributeList::ReturnIndex); } -void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) { +void RewriteStatepointsForGC::stripInvalidMetadataFromInstruction(Instruction &I) { + + if (!isa<LoadInst>(I) && !isa<StoreInst>(I)) + return; + // These are the attributes that are still valid on loads and stores after + // RS4GC. + // The metadata implying dereferenceability and noalias are (conservatively) + // dropped. This is because semantically, after RewriteStatepointsForGC runs, + // all calls to gc.statepoint "free" the entire heap. Also, gc.statepoint can + // touch the entire heap including noalias objects. Note: The reasoning is + // same as stripping the dereferenceability and noalias attributes that are + // analogous to the metadata counterparts. + // We also drop the invariant.load metadata on the load because that metadata + // implies the address operand to the load points to memory that is never + // changed once it became dereferenceable. This is no longer true after RS4GC. + // Similar reasoning applies to invariant.group metadata, which applies to + // loads within a group. + unsigned ValidMetadataAfterRS4GC[] = {LLVMContext::MD_tbaa, + LLVMContext::MD_range, + LLVMContext::MD_alias_scope, + LLVMContext::MD_nontemporal, + LLVMContext::MD_nonnull, + LLVMContext::MD_align, + LLVMContext::MD_type}; + + // Drops all metadata on the instruction other than ValidMetadataAfterRS4GC. + I.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC); + +} + +void RewriteStatepointsForGC::stripNonValidAttributesAndMetadataFromBody(Function &F) { if (F.empty()) return; LLVMContext &Ctx = F.getContext(); MDBuilder Builder(Ctx); + for (Instruction &I : instructions(F)) { if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) { assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!"); @@ -2333,6 +2368,8 @@ void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) { I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA); } + stripInvalidMetadataFromInstruction(I); + if (CallSite CS = CallSite(&I)) { for (int i = 0, e = CS.arg_size(); i != e; i++) if (isa<PointerType>(CS.getArgument(i)->getType())) @@ -2357,7 +2394,7 @@ static bool shouldRewriteStatepointsIn(Function &F) { return false; } -void RewriteStatepointsForGC::stripNonValidAttributes(Module &M) { +void RewriteStatepointsForGC::stripNonValidAttributesAndMetadata(Module &M) { #ifndef NDEBUG assert(any_of(M, shouldRewriteStatepointsIn) && "precondition!"); #endif @@ -2366,7 +2403,7 @@ void RewriteStatepointsForGC::stripNonValidAttributes(Module &M) { stripNonValidAttributesFromPrototype(F); for (Function &F : M) - stripNonValidAttributesFromBody(F); + stripNonValidAttributesAndMetadataFromBody(F); } bool RewriteStatepointsForGC::runOnFunction(Function &F) { diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 815492ac354c0..c6929c33b3e9e 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -515,10 +515,6 @@ private: void visitCmpInst(CmpInst &I); void visitExtractValueInst(ExtractValueInst &EVI); void visitInsertValueInst(InsertValueInst &IVI); - void visitLandingPadInst(LandingPadInst &I) { markOverdefined(&I); } - void visitFuncletPadInst(FuncletPadInst &FPI) { - markOverdefined(&FPI); - } void visitCatchSwitchInst(CatchSwitchInst &CPI) { markOverdefined(&CPI); visitTerminatorInst(CPI); @@ -539,13 +535,6 @@ private: void visitResumeInst (TerminatorInst &I) { /*returns void*/ } void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ } void visitFenceInst (FenceInst &I) { /*returns void*/ } - void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { - markOverdefined(&I); - } - void visitAtomicRMWInst (AtomicRMWInst &I) { markOverdefined(&I); } - void visitAllocaInst (Instruction &I) { markOverdefined(&I); } - void visitVAArgInst (Instruction &I) { markOverdefined(&I); } - void visitInstruction(Instruction &I) { // If a new instruction is added to LLVM that we don't handle. DEBUG(dbgs() << "SCCP: Don't know how to handle: " << I << '\n'); |