diff options
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/IPO/LowerTypeTests.cpp | 109 | ||||
-rw-r--r-- | lib/Transforms/IPO/PassManagerBuilder.cpp | 3 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCompares.cpp | 10 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/AddressSanitizer.cpp | 1 | ||||
-rw-r--r-- | lib/Transforms/Scalar/IndVarSimplify.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopLoadElimination.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopUnswitch.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Scalar/NewGVN.cpp | 192 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SCCP.cpp | 18 | ||||
-rw-r--r-- | lib/Transforms/Utils/FunctionImportUtils.cpp | 15 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyLibCalls.cpp | 12 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 34 |
12 files changed, 244 insertions, 158 deletions
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp index f4742aaf748f1..82daf754be0dd 100644 --- a/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/lib/Transforms/IPO/LowerTypeTests.cpp @@ -42,6 +42,8 @@ using namespace llvm; using namespace lowertypetests; +using SummaryAction = LowerTypeTestsSummaryAction; + #define DEBUG_TYPE "lowertypetests" STATISTIC(ByteArraySizeBits, "Byte array size in bits"); @@ -55,9 +57,15 @@ static cl::opt<bool> AvoidReuse( cl::desc("Try to avoid reuse of byte array addresses using aliases"), cl::Hidden, cl::init(true)); -static cl::opt<std::string> ClSummaryAction( +static cl::opt<SummaryAction> ClSummaryAction( "lowertypetests-summary-action", - cl::desc("What to do with the summary when running this pass"), cl::Hidden); + cl::desc("What to do with the summary when running this pass"), + cl::values(clEnumValN(SummaryAction::None, "none", "Do nothing"), + clEnumValN(SummaryAction::Import, "import", + "Import typeid resolutions from summary and globals"), + clEnumValN(SummaryAction::Export, "export", + "Export typeid resolutions to summary and globals")), + cl::Hidden); static cl::opt<std::string> ClReadSummary( "lowertypetests-read-summary", @@ -226,8 +234,8 @@ public: class LowerTypeTestsModule { Module &M; - // This is for testing purposes only. - std::unique_ptr<ModuleSummaryIndex> OwnedSummary; + SummaryAction Action; + ModuleSummaryIndex *Summary; bool LinkerSubsectionsViaSymbols; Triple::ArchType Arch; @@ -319,21 +327,38 @@ class LowerTypeTestsModule { void createJumpTable(Function *F, ArrayRef<GlobalTypeMember *> Functions); public: - LowerTypeTestsModule(Module &M); - ~LowerTypeTestsModule(); + LowerTypeTestsModule(Module &M, SummaryAction Action, + ModuleSummaryIndex *Summary); bool lower(); + + // Lower the module using the action and summary passed as command line + // arguments. For testing purposes only. + static bool runForTesting(Module &M); }; struct LowerTypeTests : public ModulePass { static char ID; - LowerTypeTests() : ModulePass(ID) { + + bool UseCommandLine = false; + + SummaryAction Action; + ModuleSummaryIndex *Summary; + + LowerTypeTests() : ModulePass(ID), UseCommandLine(true) { + initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); + } + + LowerTypeTests(SummaryAction Action, ModuleSummaryIndex *Summary) + : ModulePass(ID), Action(Action), Summary(Summary) { initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); } bool runOnModule(Module &M) override { if (skipModule(M)) return false; - return LowerTypeTestsModule(M).lower(); + if (UseCommandLine) + return LowerTypeTestsModule::runForTesting(M); + return LowerTypeTestsModule(M, Action, Summary).lower(); } }; @@ -343,7 +368,10 @@ INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false, false) char LowerTypeTests::ID = 0; -ModulePass *llvm::createLowerTypeTestsPass() { return new LowerTypeTests; } +ModulePass *llvm::createLowerTypeTestsPass(SummaryAction Action, + ModuleSummaryIndex *Summary) { + return new LowerTypeTests(Action, Summary); +} /// Build a bit set for TypeId using the object layouts in /// GlobalLayout. @@ -1145,22 +1173,12 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet( } /// Lower all type tests in this module. -LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) { - // Handle the command-line summary arguments. This code is for testing - // purposes only, so we handle errors directly. - if (!ClSummaryAction.empty()) { - OwnedSummary = make_unique<ModuleSummaryIndex>(); - if (!ClReadSummary.empty()) { - ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary + - ": "); - auto ReadSummaryFile = - ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary))); - - yaml::Input In(ReadSummaryFile->getBuffer()); - In >> *OwnedSummary; - ExitOnErr(errorCodeToError(In.error())); - } - } +LowerTypeTestsModule::LowerTypeTestsModule(Module &M, SummaryAction Action, + ModuleSummaryIndex *Summary) + : M(M), Action(Action), Summary(Summary) { + // FIXME: Use these fields. + (void)this->Action; + (void)this->Summary; Triple TargetTriple(M.getTargetTriple()); LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX(); @@ -1169,18 +1187,36 @@ LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) { ObjectFormat = TargetTriple.getObjectFormat(); } -LowerTypeTestsModule::~LowerTypeTestsModule() { - if (ClSummaryAction.empty() || ClWriteSummary.empty()) - return; +bool LowerTypeTestsModule::runForTesting(Module &M) { + ModuleSummaryIndex Summary; - ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary + - ": "); - std::error_code EC; - raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text); - ExitOnErr(errorCodeToError(EC)); + // Handle the command-line summary arguments. This code is for testing + // purposes only, so we handle errors directly. + if (!ClReadSummary.empty()) { + ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary + + ": "); + auto ReadSummaryFile = + ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary))); + + yaml::Input In(ReadSummaryFile->getBuffer()); + In >> Summary; + ExitOnErr(errorCodeToError(In.error())); + } + + bool Changed = LowerTypeTestsModule(M, ClSummaryAction, &Summary).lower(); + + if (!ClWriteSummary.empty()) { + ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary + + ": "); + std::error_code EC; + raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text); + ExitOnErr(errorCodeToError(EC)); + + yaml::Output Out(OS); + Out << Summary; + } - yaml::Output Out(OS); - Out << *OwnedSummary; + return Changed; } bool LowerTypeTestsModule::lower() { @@ -1313,7 +1349,8 @@ bool LowerTypeTestsModule::lower() { PreservedAnalyses LowerTypeTestsPass::run(Module &M, ModuleAnalysisManager &AM) { - bool Changed = LowerTypeTestsModule(M).lower(); + bool Changed = + LowerTypeTestsModule(M, SummaryAction::None, /*Summary=*/nullptr).lower(); if (!Changed) return PreservedAnalyses::all(); return PreservedAnalyses::none(); diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 293ddf21a68f7..d086ee05a64fe 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -857,7 +857,8 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { // Lower type metadata and the type.test intrinsic. This pass supports Clang's // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at // link time if CFI is enabled. The pass does nothing if CFI is disabled. - PM.add(createLowerTypeTestsPass()); + PM.add(createLowerTypeTestsPass(LowerTypeTestsSummaryAction::None, + /*Summary=*/nullptr)); if (OptLevel != 0) addLateLTOOptimizationPasses(PM); diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 012bfc7b4944c..013159cde7740 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1903,7 +1903,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, return foldICmpShlOne(Cmp, Shl, C); // Check that the shift amount is in range. If not, don't perform undefined - // shifts. When the shift is visited it will be simplified. + // shifts. When the shift is visited, it will be simplified. unsigned TypeBits = C->getBitWidth(); if (ShiftAmt->uge(TypeBits)) return nullptr; @@ -1923,7 +1923,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, return new ICmpInst(Pred, X, LShrC); if (Shl->hasOneUse()) { - // Otherwise strength reduce the shift into an and. + // Otherwise, strength reduce the shift into an and. Constant *Mask = ConstantInt::get(Shl->getType(), APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue())); @@ -1951,7 +1951,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, } // When the shift is nuw and pred is >u or <=u, comparison only really happens - // in the pre-shifted bits. Since InstSimplify canoncalizes <=u into <u, the + // in the pre-shifted bits. Since InstSimplify canonicalizes <=u into <u, the // <=u case can be further converted to match <u (see below). if (Shl->hasNoUnsignedWrap() && (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT)) { @@ -1970,9 +1970,9 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, // Transform (icmp pred iM (shl iM %v, N), C) // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N)) // Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N. - // This enables us to get rid of the shift in favor of a trunc which can be + // This enables us to get rid of the shift in favor of a trunc that may be // free on the target. It has the additional benefit of comparing to a - // smaller constant, which will be target friendly. + // smaller constant that may be more target-friendly. unsigned Amt = ShiftAmt->getLimitedValue(TypeBits - 1); if (Shl->hasOneUse() && Amt != 0 && C->countTrailingZeros() >= Amt && DL.isLegalInteger(TypeBits - Amt)) { diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 1d55283987766..54bdc9e0772b5 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1818,6 +1818,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { RegisteredFlag = new GlobalVariable( M, IntptrTy, false, GlobalVariable::CommonLinkage, ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName); + RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility); // Update llvm.compiler.used, adding the new liveness globals. This is // needed so that during LTO these variables stay alive. The alternative diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 6aeb5237ffe35..68faa886060a1 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1423,7 +1423,7 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { if (widenLoopCompare(DU)) return nullptr; - // This user does not evaluate to a recurence after widening, so don't + // This user does not evaluate to a recurrence after widening, so don't // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. truncateIVUse(DU, DT, LI); diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp index 08e7acdaaf72c..8fb580183e307 100644 --- a/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -415,7 +415,9 @@ public: Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(), PH->getTerminator()); Value *Initial = - new LoadInst(InitialPtr, "load_initial", PH->getTerminator()); + new LoadInst(InitialPtr, "load_initial", /* isVolatile */ false, + Cand.Load->getAlignment(), PH->getTerminator()); + PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded", &L->getHeader()->front()); PHI->addIncoming(Initial, PH); diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 6f7682c96cefb..76fe91884c7b8 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -1382,8 +1382,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(), Succ->begin(), Succ->end()); LPM->deleteSimpleAnalysisValue(BI, L); - BI->eraseFromParent(); RemoveFromWorklist(BI, Worklist); + BI->eraseFromParent(); // Remove Succ from the loop tree. LI->removeBlock(Succ); diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 8b8236390bf47..eef7db08cd460 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -79,7 +79,8 @@ STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted"); STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted"); STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified"); STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same"); -STATISTIC(NumGVNMaxIterations, "Maximum Number of iterations it took to converge GVN"); +STATISTIC(NumGVNMaxIterations, + "Maximum Number of iterations it took to converge GVN"); //===----------------------------------------------------------------------===// // GVN Pass @@ -327,7 +328,7 @@ private: // Elimination. struct ValueDFS; void convertDenseToDFSOrdered(CongruenceClass::MemberSet &, - std::vector<ValueDFS> &); + SmallVectorImpl<ValueDFS> &); bool eliminateInstructions(Function &); void replaceInstruction(Instruction *, Value *); @@ -336,8 +337,11 @@ private: // New instruction creation. void handleNewInstruction(Instruction *){}; + + // Various instruction touch utilities void markUsersTouched(Value *); void markMemoryUsersTouched(MemoryAccess *); + void markLeaderChangeTouched(CongruenceClass *CC); // Utilities. void cleanupTables(); @@ -390,10 +394,10 @@ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_END(NewGVN, "newgvn", "Global Value Numbering", false, false) PHIExpression *NewGVN::createPHIExpression(Instruction *I) { - BasicBlock *PhiBlock = I->getParent(); + BasicBlock *PHIBlock = I->getParent(); auto *PN = cast<PHINode>(I); - auto *E = new (ExpressionAllocator) - PHIExpression(PN->getNumOperands(), I->getParent()); + auto *E = + new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock); E->allocateOperands(ArgRecycler, ExpressionAllocator); E->setType(I->getType()); @@ -408,10 +412,10 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I) { std::transform(Filtered.begin(), Filtered.end(), op_inserter(E), [&](const Use &U) -> Value * { - // Don't try to transform self-defined phis + // Don't try to transform self-defined phis. if (U == PN) return PN; - const BasicBlockEdge BBE(PN->getIncomingBlock(U), PhiBlock); + const BasicBlockEdge BBE(PN->getIncomingBlock(U), PHIBlock); return lookupOperandLeader(U, I, BBE); }); return E; @@ -710,6 +714,15 @@ const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI, return E; } +// Utility function to check whether the congruence class has a member other +// than the given instruction. +bool hasMemberOtherThanUs(const CongruenceClass *CC, Instruction *I) { + // Either it has more than one member, in which case it must contain something + // other than us (because it's indexed by value), or if it only has one member + // right now, that member should not be us. + return CC->Members.size() > 1 || CC->Members.count(I) == 0; +} + const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I, const BasicBlock *B) { // Unlike loads, we never try to eliminate stores, so we do not check if they @@ -725,8 +738,12 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I, cast<MemoryDef>(StoreAccess)->getDefiningAccess()); const Expression *OldStore = createStoreExpression(SI, StoreRHS, B); CongruenceClass *CC = ExpressionToClass.lookup(OldStore); + // Basically, check if the congruence class the store is in is defined by a + // store that isn't us, and has the same value. MemorySSA takes care of + // ensuring the store has the same memory state as us already. if (CC && CC->DefiningExpr && isa<StoreExpression>(CC->DefiningExpr) && - CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B)) + CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B) && + hasMemberOtherThanUs(CC, I)) return createStoreExpression(SI, StoreRHS, B); } @@ -810,36 +827,50 @@ bool NewGVN::setMemoryAccessEquivTo(MemoryAccess *From, MemoryAccess *To) { const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I, const BasicBlock *B) { auto *E = cast<PHIExpression>(createPHIExpression(I)); - if (E->op_empty()) { + // We match the semantics of SimplifyPhiNode from InstructionSimplify here. + + // See if all arguaments are the same. + // We track if any were undef because they need special handling. + bool HasUndef = false; + auto Filtered = make_filter_range(E->operands(), [&](const Value *Arg) { + if (Arg == I) + return false; + if (isa<UndefValue>(Arg)) { + HasUndef = true; + return false; + } + return true; + }); + // If we are left with no operands, it's undef + if (Filtered.begin() == Filtered.end()) { DEBUG(dbgs() << "Simplified PHI node " << *I << " to undef" << "\n"); E->deallocateOperands(ArgRecycler); ExpressionAllocator.Deallocate(E); return createConstantExpression(UndefValue::get(I->getType())); } - - Value *AllSameValue = E->getOperand(0); - - // See if all arguments are the same, ignoring undef arguments, because we can - // choose a value that is the same for them. - for (const Value *Arg : E->operands()) - if (Arg != AllSameValue && !isa<UndefValue>(Arg)) { - AllSameValue = nullptr; - break; + Value *AllSameValue = *(Filtered.begin()); + ++Filtered.begin(); + // Can't use std::equal here, sadly, because filter.begin moves. + if (llvm::all_of(Filtered, [AllSameValue](const Value *V) { + return V == AllSameValue; + })) { + // In LLVM's non-standard representation of phi nodes, it's possible to have + // phi nodes with cycles (IE dependent on other phis that are .... dependent + // on the original phi node), especially in weird CFG's where some arguments + // are unreachable, or uninitialized along certain paths. This can cause + // infinite loops during evaluation. We work around this by not trying to + // really evaluate them independently, but instead using a variable + // expression to say if one is equivalent to the other. + // We also special case undef, so that if we have an undef, we can't use the + // common value unless it dominates the phi block. + if (HasUndef) { + // Only have to check for instructions + if (auto *AllSameInst = dyn_cast<Instruction>(AllSameValue)) + if (!DT->dominates(AllSameInst, I)) + return E; } - if (AllSameValue) { - // It's possible to have phi nodes with cycles (IE dependent on - // other phis that are .... dependent on the original phi node), - // especially in weird CFG's where some arguments are unreachable, or - // uninitialized along certain paths. - // This can cause infinite loops during evaluation (even if you disable - // the recursion below, you will simply ping-pong between congruence - // classes). If a phi node symbolically evaluates to another phi node, - // just leave it alone. If they are really the same, we will still - // eliminate them in favor of each other. - if (isa<PHINode>(AllSameValue)) - return E; NumGVNPhisAllSame++; DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue << "\n"); @@ -1007,12 +1038,22 @@ void NewGVN::markMemoryUsersTouched(MemoryAccess *MA) { } } +// Touch the instructions that need to be updated after a congruence class has a +// leader change, and mark changed values. +void NewGVN::markLeaderChangeTouched(CongruenceClass *CC) { + for (auto M : CC->Members) { + if (auto *I = dyn_cast<Instruction>(M)) + TouchedInstructions.set(InstrDFS[I]); + ChangedValues.insert(M); + } +} + // Perform congruence finding on a given value numbering expression. void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { - ValueToExpression[V] = E; // This is guaranteed to return something, since it will at least find // INITIAL. + CongruenceClass *VClass = ValueToClass[V]; assert(VClass && "Should have found a vclass"); // Dead classes should have been eliminated from the mapping. @@ -1031,14 +1072,17 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { place->second = NewClass; // Constants and variables should always be made the leader. - if (const auto *CE = dyn_cast<ConstantExpression>(E)) + if (const auto *CE = dyn_cast<ConstantExpression>(E)) { NewClass->RepLeader = CE->getConstantValue(); - else if (const auto *VE = dyn_cast<VariableExpression>(E)) - NewClass->RepLeader = VE->getVariableValue(); - else if (const auto *SE = dyn_cast<StoreExpression>(E)) - NewClass->RepLeader = SE->getStoreInst()->getValueOperand(); - else + } else if (const auto *SE = dyn_cast<StoreExpression>(E)) { + StoreInst *SI = SE->getStoreInst(); + NewClass->RepLeader = + lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent()); + } else { NewClass->RepLeader = V; + } + assert(!isa<VariableExpression>(E) && + "VariableExpression should have been handled already"); EClass = NewClass; DEBUG(dbgs() << "Created new congruence class for " << *V @@ -1077,14 +1121,11 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { ExpressionToClass.erase(VClass->DefiningExpr); } } else if (VClass->RepLeader == V) { - // FIXME: When the leader changes, the value numbering of - // everything may change, so we need to reprocess. + // When the leader changes, the value numbering of + // everything may change due to symbolization changes, so we need to + // reprocess. VClass->RepLeader = *(VClass->Members.begin()); - for (auto M : VClass->Members) { - if (auto *I = dyn_cast<Instruction>(M)) - TouchedInstructions.set(InstrDFS[I]); - ChangedValues.insert(M); - } + markLeaderChangeTouched(VClass); } } @@ -1106,6 +1147,27 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { markMemoryUsersTouched(MA); } } + } else if (StoreInst *SI = dyn_cast<StoreInst>(V)) { + // There is, sadly, one complicating thing for stores. Stores do not + // produce values, only consume them. However, in order to make loads and + // stores value number the same, we ignore the value operand of the store. + // But the value operand will still be the leader of our class, and thus, it + // may change. Because the store is a use, the store will get reprocessed, + // but nothing will change about it, and so nothing above will catch it + // (since the class will not change). In order to make sure everything ends + // up okay, we need to recheck the leader of the class. Since stores of + // different values value number differently due to different memorydefs, we + // are guaranteed the leader is always the same between stores in the same + // class. + DEBUG(dbgs() << "Checking store leader\n"); + auto ProperLeader = + lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent()); + if (EClass->RepLeader != ProperLeader) { + DEBUG(dbgs() << "Store leader changed, fixing\n"); + EClass->RepLeader = ProperLeader; + markLeaderChangeTouched(EClass); + markMemoryUsersTouched(MSSA->getMemoryAccess(SI)); + } } } @@ -1708,8 +1770,9 @@ struct NewGVN::ValueDFS { } }; -void NewGVN::convertDenseToDFSOrdered(CongruenceClass::MemberSet &Dense, - std::vector<ValueDFS> &DFSOrderedSet) { +void NewGVN::convertDenseToDFSOrdered( + CongruenceClass::MemberSet &Dense, + SmallVectorImpl<ValueDFS> &DFSOrderedSet) { for (auto D : Dense) { // First add the value. BasicBlock *BB = getBlockForValue(D); @@ -1972,21 +2035,25 @@ bool NewGVN::eliminateInstructions(Function &F) { ValueDFSStack EliminationStack; // Convert the members to DFS ordered sets and then merge them. - std::vector<ValueDFS> DFSOrderedSet; + SmallVector<ValueDFS, 8> DFSOrderedSet; convertDenseToDFSOrdered(CC->Members, DFSOrderedSet); // Sort the whole thing. - sort(DFSOrderedSet.begin(), DFSOrderedSet.end()); - - for (auto &C : DFSOrderedSet) { - int MemberDFSIn = C.DFSIn; - int MemberDFSOut = C.DFSOut; - Value *Member = C.Val; - Use *MemberUse = C.U; - - // We ignore void things because we can't get a value from them. - if (Member && Member->getType()->isVoidTy()) - continue; + std::sort(DFSOrderedSet.begin(), DFSOrderedSet.end()); + + for (auto &VD : DFSOrderedSet) { + int MemberDFSIn = VD.DFSIn; + int MemberDFSOut = VD.DFSOut; + Value *Member = VD.Val; + Use *MemberUse = VD.U; + + if (Member) { + // We ignore void things because we can't get a value from them. + // FIXME: We could actually use this to kill dead stores that are + // dominated by equivalent earlier stores. + if (Member->getType()->isVoidTy()) + continue; + } if (EliminationStack.empty()) { DEBUG(dbgs() << "Elimination Stack is empty\n"); @@ -1995,8 +2062,6 @@ bool NewGVN::eliminateInstructions(Function &F) { << EliminationStack.dfs_back().first << "," << EliminationStack.dfs_back().second << ")\n"); } - if (Member && isa<Constant>(Member)) - assert(isa<Constant>(CC->RepLeader)); DEBUG(dbgs() << "Current DFS numbers are (" << MemberDFSIn << "," << MemberDFSOut << ")\n"); @@ -2037,11 +2102,8 @@ bool NewGVN::eliminateInstructions(Function &F) { continue; Value *Result = EliminationStack.back(); - // Don't replace our existing users with ourselves, and don't replace - // phi node arguments with the result of the same phi node. - // IE tmp = phi(tmp11, undef); tmp11 = foo -> tmp = phi(tmp, undef) - if (MemberUse->get() == Result || - (isa<PHINode>(Result) && MemberUse->getUser() == Result)) + // Don't replace our existing users with ourselves. + if (MemberUse->get() == Result) continue; DEBUG(dbgs() << "Found replacement " << *Result << " for " diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 8a6be97d08c7c..34be90692481b 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -511,9 +511,6 @@ private: void visitSelectInst(SelectInst &I); void visitBinaryOperator(Instruction &I); void visitCmpInst(CmpInst &I); - void visitExtractElementInst(ExtractElementInst &I); - void visitInsertElementInst(InsertElementInst &I); - void visitShuffleVectorInst(ShuffleVectorInst &I); void visitExtractValueInst(ExtractValueInst &EVI); void visitInsertValueInst(InsertValueInst &IVI); void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); } @@ -970,21 +967,6 @@ void SCCPSolver::visitCmpInst(CmpInst &I) { markOverdefined(&I); } -void SCCPSolver::visitExtractElementInst(ExtractElementInst &I) { - // TODO : SCCP does not handle vectors properly. - return markOverdefined(&I); -} - -void SCCPSolver::visitInsertElementInst(InsertElementInst &I) { - // TODO : SCCP does not handle vectors properly. - return markOverdefined(&I); -} - -void SCCPSolver::visitShuffleVectorInst(ShuffleVectorInst &I) { - // TODO : SCCP does not handle vectors properly. - return markOverdefined(&I); -} - // Handle getelementptr instructions. If all operands are constants then we // can turn this into a getelementptr ConstantExpr. // diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp index 678d02e05d423..9844190ef84a2 100644 --- a/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -67,12 +67,15 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal( return true; } - // When exporting, consult the index. - auto Summaries = ImportIndex.findGlobalValueSummaryList(SGV->getGUID()); - assert(Summaries != ImportIndex.end() && - "Missing summary for global value when exporting"); - assert(Summaries->second.size() == 1 && "Local has more than one summary"); - auto Linkage = Summaries->second.front()->linkage(); + // When exporting, consult the index. We can have more than one local + // with the same GUID, in the case of same-named locals in different but + // same-named source files that were compiled in their respective directories + // (so the source file name and resulting GUID is the same). Find the one + // in this module. + auto Summary = ImportIndex.findSummaryInModule( + SGV->getGUID(), SGV->getParent()->getModuleIdentifier()); + assert(Summary && "Missing summary for global value when exporting"); + auto Linkage = Summary->linkage(); if (!GlobalValue::isLocalLinkage(Linkage)) { assert(!isNonRenamableLocal(*SGV) && "Attempting to promote non-renamable local"); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index c8f030f7eb835..11d54bcf4f89d 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1189,19 +1189,11 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - Value *Ret = nullptr; StringRef Name = Callee->getName(); if (Name == "fabs" && hasFloatVersion(Name)) - Ret = optimizeUnaryDoubleFP(CI, B, false); + return optimizeUnaryDoubleFP(CI, B, false); - Value *Op = CI->getArgOperand(0); - if (Instruction *I = dyn_cast<Instruction>(Op)) { - // Fold fabs(x * x) -> x * x; any squared FP value must already be positive. - if (I->getOpcode() == Instruction::FMul) - if (I->getOperand(0) == I->getOperand(1)) - return Op; - } - return Ret; + return nullptr; } Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 31daba2248aae..578c65daf7c0f 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -783,6 +783,10 @@ protected: // Similarly, we create a new latch condition when setting up the structure // of the new loop, so the old one can become dead. SmallPtrSet<Instruction *, 4> DeadInstructions; + + // Holds the end values for each induction variable. We save the end values + // so we can later fix-up the external users of the induction variables. + DenseMap<PHINode *, Value *> IVEndValues; }; class InnerLoopUnroller : public InnerLoopVectorizer { @@ -1879,13 +1883,6 @@ public: unsigned selectInterleaveCount(bool OptForSize, unsigned VF, unsigned LoopCost); - /// \return The most profitable unroll factor. - /// This method finds the best unroll-factor based on register pressure and - /// other parameters. VF and LoopCost are the selected vectorization factor - /// and the cost of the selected VF. - unsigned computeInterleaveCount(bool OptForSize, unsigned VF, - unsigned LoopCost); - /// \brief A struct that represents some properties of the register usage /// of a loop. struct RegisterUsage { @@ -3424,7 +3421,7 @@ void InnerLoopVectorizer::createEmptyLoop() { // Create phi nodes to merge from the backedge-taken check block. PHINode *BCResumeVal = PHINode::Create( OrigPhi->getType(), 3, "bc.resume.val", ScalarPH->getTerminator()); - Value *EndValue; + Value *&EndValue = IVEndValues[OrigPhi]; if (OrigPhi == OldInduction) { // We know what the end value is. EndValue = CountRoundDown; @@ -3443,9 +3440,6 @@ void InnerLoopVectorizer::createEmptyLoop() { // or the value at the end of the vectorized loop. BCResumeVal->addIncoming(EndValue, MiddleBlock); - // Fix up external users of the induction variable. - fixupIVUsers(OrigPhi, II, CountRoundDown, EndValue, MiddleBlock); - // Fix the scalar body counter (PHI node). unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH); @@ -4116,11 +4110,23 @@ void InnerLoopVectorizer::vectorizeLoop() { Phi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst); } // end of for each Phi in PHIsToFix. - fixLCSSAPHIs(); - - // Make sure DomTree is updated. + // Update the dominator tree. + // + // FIXME: After creating the structure of the new loop, the dominator tree is + // no longer up-to-date, and it remains that way until we update it + // here. An out-of-date dominator tree is problematic for SCEV, + // because SCEVExpander uses it to guide code generation. The + // vectorizer use SCEVExpanders in several places. Instead, we should + // keep the dominator tree up-to-date as we go. updateAnalysis(); + // Fix-up external users of the induction variables. + for (auto &Entry : *Legal->getInductionVars()) + fixupIVUsers(Entry.first, Entry.second, + getOrCreateVectorTripCount(LI->getLoopFor(LoopVectorBody)), + IVEndValues[Entry.first], LoopMiddleBlock); + + fixLCSSAPHIs(); predicateInstructions(); // Remove redundant induction instructions. |