diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp')
-rw-r--r-- | llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp | 193 |
1 files changed, 126 insertions, 67 deletions
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index b242f100fafff..dc2ad14ae61e5 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -271,7 +271,7 @@ struct PartiallyConstructedSafepointRecord { /// The *new* gc.statepoint instruction itself. This produces the token /// that normal path gc.relocates and the gc.result are tied to. - Instruction *StatepointToken; + GCStatepointInst *StatepointToken; /// Instruction to which exceptional gc relocates are attached /// Makes it easier to iterate through them during relocationViaAlloca. @@ -381,14 +381,19 @@ static void analyzeParsePointLiveness( dbgs() << " " << V->getName() << " " << *V << "\n"; } if (PrintLiveSetSize) { - dbgs() << "Safepoint For: " << Call->getCalledValue()->getName() << "\n"; + dbgs() << "Safepoint For: " << Call->getCalledOperand()->getName() << "\n"; dbgs() << "Number live values: " << LiveSet.size() << "\n"; } Result.LiveSet = LiveSet; } +// Returns true is V is a knownBaseResult. static bool isKnownBaseResult(Value *V); +// Returns true if V is a BaseResult that already exists in the IR, i.e. it is +// not created by the findBasePointers algorithm. +static bool isOriginalBaseResult(Value *V); + namespace { /// A single base defining value - An immediate base defining value for an @@ -633,15 +638,20 @@ static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) { return Def; } +/// This value is a base pointer that is not generated by RS4GC, i.e. it already +/// exists in the code. +static bool isOriginalBaseResult(Value *V) { + // no recursion possible + return !isa<PHINode>(V) && !isa<SelectInst>(V) && + !isa<ExtractElementInst>(V) && !isa<InsertElementInst>(V) && + !isa<ShuffleVectorInst>(V); +} + /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV, /// is it known to be a base pointer? Or do we need to continue searching. static bool isKnownBaseResult(Value *V) { - if (!isa<PHINode>(V) && !isa<SelectInst>(V) && - !isa<ExtractElementInst>(V) && !isa<InsertElementInst>(V) && - !isa<ShuffleVectorInst>(V)) { - // no recursion possible + if (isOriginalBaseResult(V)) return true; - } if (isa<Instruction>(V) && cast<Instruction>(V)->getMetadata("is_base_value")) { // This is a previously inserted base phi or select. We know @@ -653,6 +663,12 @@ static bool isKnownBaseResult(Value *V) { return false; } +// Returns true if First and Second values are both scalar or both vector. +static bool areBothVectorOrScalar(Value *First, Value *Second) { + return isa<VectorType>(First->getType()) == + isa<VectorType>(Second->getType()); +} + namespace { /// Models the state of a single base defining value in the findBasePointer @@ -762,7 +778,7 @@ static BDVState meetBDVState(const BDVState &LHS, const BDVState &RHS) { static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { Value *Def = findBaseOrBDV(I, Cache); - if (isKnownBaseResult(Def)) + if (isKnownBaseResult(Def) && areBothVectorOrScalar(Def, I)) return Def; // Here's the rough algorithm: @@ -810,13 +826,16 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { States.insert({Def, BDVState()}); while (!Worklist.empty()) { Value *Current = Worklist.pop_back_val(); - assert(!isKnownBaseResult(Current) && "why did it get added?"); + assert(!isOriginalBaseResult(Current) && "why did it get added?"); auto visitIncomingValue = [&](Value *InVal) { Value *Base = findBaseOrBDV(InVal, Cache); - if (isKnownBaseResult(Base)) + if (isKnownBaseResult(Base) && areBothVectorOrScalar(Base, InVal)) // Known bases won't need new instructions introduced and can be - // ignored safely + // ignored safely. However, this can only be done when InVal and Base + // are both scalar or both vector. Otherwise, we need to find a + // correct BDV for InVal, by creating an entry in the lattice + // (States). return; assert(isExpectedBDVType(Base) && "the only non-base values " "we see should be base defining values"); @@ -853,10 +872,10 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { // Return a phi state for a base defining value. We'll generate a new // base state for known bases and expect to find a cached state otherwise. - auto getStateForBDV = [&](Value *baseValue) { - if (isKnownBaseResult(baseValue)) - return BDVState(baseValue); - auto I = States.find(baseValue); + auto GetStateForBDV = [&](Value *BaseValue, Value *Input) { + if (isKnownBaseResult(BaseValue) && areBothVectorOrScalar(BaseValue, Input)) + return BDVState(BaseValue); + auto I = States.find(BaseValue); assert(I != States.end() && "lookup failed!"); return I->second; }; @@ -873,13 +892,18 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { // much faster. for (auto Pair : States) { Value *BDV = Pair.first; - assert(!isKnownBaseResult(BDV) && "why did it get added?"); + // Only values that do not have known bases or those that have differing + // type (scalar versus vector) from a possible known base should be in the + // lattice. + assert((!isKnownBaseResult(BDV) || + !areBothVectorOrScalar(BDV, Pair.second.getBaseValue())) && + "why did it get added?"); // Given an input value for the current instruction, return a BDVState // instance which represents the BDV of that value. auto getStateForInput = [&](Value *V) mutable { Value *BDV = findBaseOrBDV(V, Cache); - return getStateForBDV(BDV); + return GetStateForBDV(BDV, V); }; BDVState NewState; @@ -926,20 +950,26 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { } #endif - // Insert Phis for all conflicts - // TODO: adjust naming patterns to avoid this order of iteration dependency + // Handle all instructions that have a vector BDV, but the instruction itself + // is of scalar type. for (auto Pair : States) { Instruction *I = cast<Instruction>(Pair.first); BDVState State = Pair.second; - assert(!isKnownBaseResult(I) && "why did it get added?"); + auto *BaseValue = State.getBaseValue(); + // Only values that do not have known bases or those that have differing + // type (scalar versus vector) from a possible known base should be in the + // lattice. + assert((!isKnownBaseResult(I) || !areBothVectorOrScalar(I, BaseValue)) && + "why did it get added?"); assert(!State.isUnknown() && "Optimistic algorithm didn't complete!"); + if (!State.isBase() || !isa<VectorType>(BaseValue->getType())) + continue; // extractelement instructions are a bit special in that we may need to // insert an extract even when we know an exact base for the instruction. // The problem is that we need to convert from a vector base to a scalar // base for the particular indice we're interested in. - if (State.isBase() && isa<ExtractElementInst>(I) && - isa<VectorType>(State.getBaseValue()->getType())) { + if (isa<ExtractElementInst>(I)) { auto *EE = cast<ExtractElementInst>(I); // TODO: In many cases, the new instruction is just EE itself. We should // exploit this, but can't do it here since it would break the invariant @@ -948,7 +978,27 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { State.getBaseValue(), EE->getIndexOperand(), "base_ee", EE); BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {})); States[I] = BDVState(BDVState::Base, BaseInst); + } else if (!isa<VectorType>(I->getType())) { + // We need to handle cases that have a vector base but the instruction is + // a scalar type (these could be phis or selects or any instruction that + // are of scalar type, but the base can be a vector type). We + // conservatively set this as conflict. Setting the base value for these + // conflicts is handled in the next loop which traverses States. + States[I] = BDVState(BDVState::Conflict); } + } + + // Insert Phis for all conflicts + // TODO: adjust naming patterns to avoid this order of iteration dependency + for (auto Pair : States) { + Instruction *I = cast<Instruction>(Pair.first); + BDVState State = Pair.second; + // Only values that do not have known bases or those that have differing + // type (scalar versus vector) from a possible known base should be in the + // lattice. + assert((!isKnownBaseResult(I) || !areBothVectorOrScalar(I, State.getBaseValue())) && + "why did it get added?"); + assert(!State.isUnknown() && "Optimistic algorithm didn't complete!"); // Since we're joining a vector and scalar base, they can never be the // same. As a result, we should always see insert element having reached @@ -987,7 +1037,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { auto *SV = cast<ShuffleVectorInst>(I); UndefValue *VecUndef = UndefValue::get(SV->getOperand(0)->getType()); std::string Name = suffixed_name_or(I, ".base", "base_sv"); - return new ShuffleVectorInst(VecUndef, VecUndef, SV->getOperand(2), + return new ShuffleVectorInst(VecUndef, VecUndef, SV->getShuffleMask(), Name, SV); } }; @@ -1008,7 +1058,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) { Value *BDV = findBaseOrBDV(Input, Cache); Value *Base = nullptr; - if (isKnownBaseResult(BDV)) { + if (isKnownBaseResult(BDV) && areBothVectorOrScalar(BDV, Input)) { Base = BDV; } else { // Either conflict or base. @@ -1029,7 +1079,12 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { Instruction *BDV = cast<Instruction>(Pair.first); BDVState State = Pair.second; - assert(!isKnownBaseResult(BDV) && "why did it get added?"); + // Only values that do not have known bases or those that have differing + // type (scalar versus vector) from a possible known base should be in the + // lattice. + assert((!isKnownBaseResult(BDV) || + !areBothVectorOrScalar(BDV, State.getBaseValue())) && + "why did it get added?"); assert(!State.isUnknown() && "Optimistic algorithm didn't complete!"); if (!State.isConflict()) continue; @@ -1119,7 +1174,11 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { auto *BDV = Pair.first; Value *Base = Pair.second.getBaseValue(); assert(BDV && Base); - assert(!isKnownBaseResult(BDV) && "why did it get added?"); + // Only values that do not have known bases or those that have differing + // type (scalar versus vector) from a possible known base should be in the + // lattice. + assert((!isKnownBaseResult(BDV) || !areBothVectorOrScalar(BDV, Base)) && + "why did it get added?"); LLVM_DEBUG( dbgs() << "Updating base value cache" @@ -1238,7 +1297,8 @@ normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, // Create new attribute set containing only attributes which can be transferred // from original call to the safepoint. -static AttributeList legalizeCallAttributes(AttributeList AL) { +static AttributeList legalizeCallAttributes(LLVMContext &Ctx, + AttributeList AL) { if (AL.isEmpty()) return AL; @@ -1252,7 +1312,6 @@ static AttributeList legalizeCallAttributes(AttributeList AL) { } // Just skip parameter and return attributes for now - LLVMContext &Ctx = AL.getContext(); return AttributeList::get(Ctx, AttributeList::FunctionIndex, AttributeSet::get(Ctx, FnAttrs)); } @@ -1261,16 +1320,14 @@ static AttributeList legalizeCallAttributes(AttributeList AL) { /// statepoint. /// Inputs: /// liveVariables - list of variables to be relocated. -/// liveStart - index of the first live variable. /// basePtrs - base pointers. /// statepointToken - statepoint instruction to which relocates should be /// bound. /// Builder - Llvm IR builder to be used to construct new calls. static void CreateGCRelocates(ArrayRef<Value *> LiveVariables, - const int LiveStart, ArrayRef<Value *> BasePtrs, Instruction *StatepointToken, - IRBuilder<> Builder) { + IRBuilder<> &Builder) { if (LiveVariables.empty()) return; @@ -1295,7 +1352,8 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables, auto AS = Ty->getScalarType()->getPointerAddressSpace(); Type *NewTy = Type::getInt8PtrTy(M->getContext(), AS); if (auto *VT = dyn_cast<VectorType>(Ty)) - NewTy = VectorType::get(NewTy, VT->getNumElements()); + NewTy = FixedVectorType::get(NewTy, + cast<FixedVectorType>(VT)->getNumElements()); return Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, {NewTy}); }; @@ -1307,9 +1365,8 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables, for (unsigned i = 0; i < LiveVariables.size(); i++) { // Generate the gc.relocate call and save the result - Value *BaseIdx = - Builder.getInt32(LiveStart + FindIndex(LiveVariables, BasePtrs[i])); - Value *LiveIdx = Builder.getInt32(LiveStart + i); + Value *BaseIdx = Builder.getInt32(FindIndex(LiveVariables, BasePtrs[i])); + Value *LiveIdx = Builder.getInt32(i); Type *Ty = LiveVariables[i]->getType(); if (!TypeToDeclMap.count(Ty)) @@ -1431,12 +1488,14 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ uint32_t Flags = uint32_t(StatepointFlags::None); ArrayRef<Use> CallArgs(Call->arg_begin(), Call->arg_end()); - ArrayRef<Use> DeoptArgs = GetDeoptBundleOperands(Call); - ArrayRef<Use> TransitionArgs; - if (auto TransitionBundle = - Call->getOperandBundle(LLVMContext::OB_gc_transition)) { + Optional<ArrayRef<Use>> DeoptArgs; + if (auto Bundle = Call->getOperandBundle(LLVMContext::OB_deopt)) + DeoptArgs = Bundle->Inputs; + Optional<ArrayRef<Use>> TransitionArgs; + if (auto Bundle = Call->getOperandBundle(LLVMContext::OB_gc_transition)) { + TransitionArgs = Bundle->Inputs; + // TODO: This flag no longer serves a purpose and can be removed later Flags |= uint32_t(StatepointFlags::GCTransition); - TransitionArgs = TransitionBundle->Inputs; } // Instead of lowering calls to @llvm.experimental.deoptimize as normal calls @@ -1459,7 +1518,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ assert(DeoptLowering.equals("live-through") && "Unsupported value!"); } - Value *CallTarget = Call->getCalledValue(); + Value *CallTarget = Call->getCalledOperand(); if (Function *F = dyn_cast<Function>(CallTarget)) { if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize) { // Calls to llvm.experimental.deoptimize are lowered to calls to the @@ -1485,7 +1544,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ } // Create the statepoint given all the arguments - Instruction *Token = nullptr; + GCStatepointInst *Token = nullptr; if (auto *CI = dyn_cast<CallInst>(Call)) { CallInst *SPCall = Builder.CreateGCStatepointCall( StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs, @@ -1498,9 +1557,10 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ // function attributes. In case if we can handle this set of attributes - // set up function attrs directly on statepoint and return attrs later for // gc_result intrinsic. - SPCall->setAttributes(legalizeCallAttributes(CI->getAttributes())); + SPCall->setAttributes( + legalizeCallAttributes(CI->getContext(), CI->getAttributes())); - Token = SPCall; + Token = cast<GCStatepointInst>(SPCall); // Put the following gc_result and gc_relocate calls immediately after the // the old call (which we're about to delete) @@ -1524,9 +1584,10 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ // function attributes. In case if we can handle this set of attributes - // set up function attrs directly on statepoint and return attrs later for // gc_result intrinsic. - SPInvoke->setAttributes(legalizeCallAttributes(II->getAttributes())); + SPInvoke->setAttributes( + legalizeCallAttributes(II->getContext(), II->getAttributes())); - Token = SPInvoke; + Token = cast<GCStatepointInst>(SPInvoke); // Generate gc relocates in exceptional path BasicBlock *UnwindBlock = II->getUnwindDest(); @@ -1541,9 +1602,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst(); Result.UnwindToken = ExceptionalToken; - const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx(); - CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, ExceptionalToken, - Builder); + CreateGCRelocates(LiveVariables, BasePtrs, ExceptionalToken, Builder); // Generate gc relocates and returns for normal block BasicBlock *NormalDest = II->getNormalDest(); @@ -1589,8 +1648,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ Result.StatepointToken = Token; // Second, create a gc.relocate for every live variable - const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx(); - CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, Token, Builder); + CreateGCRelocates(LiveVariables, BasePtrs, Token, Builder); } // Replace an existing gc.statepoint with a new one and a set of gc.relocates @@ -1651,8 +1709,8 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs, cast<AllocaInst>(Alloca)->getAllocatedType(), suffixed_name_or(Relocate, ".casted", "")); - StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca); - Store->insertAfter(cast<Instruction>(CastedRelocatedValue)); + new StoreInst(CastedRelocatedValue, Alloca, + cast<Instruction>(CastedRelocatedValue)->getNextNode()); #ifndef NDEBUG VisitedLiveValues.insert(OriginalValue); @@ -1674,8 +1732,8 @@ static void insertRematerializationStores( "Can not find alloca for rematerialized value"); Value *Alloca = AllocaMap[OriginalValue]; - StoreInst *Store = new StoreInst(RematerializedValue, Alloca); - Store->insertAfter(RematerializedValue); + new StoreInst(RematerializedValue, Alloca, + RematerializedValue->getNextNode()); #ifndef NDEBUG VisitedLiveValues.insert(OriginalValue); @@ -1780,8 +1838,7 @@ static void relocationViaAlloca( for (auto *AI : ToClobber) { auto PT = cast<PointerType>(AI->getAllocatedType()); Constant *CPN = ConstantPointerNull::get(PT); - StoreInst *Store = new StoreInst(CPN, AI); - Store->insertBefore(IP); + new StoreInst(CPN, AI, IP); } }; @@ -1843,7 +1900,8 @@ static void relocationViaAlloca( // Emit store for the initial gc value. Store must be inserted after load, // otherwise store will be in alloca's use list and an extra load will be // inserted before it. - StoreInst *Store = new StoreInst(Def, Alloca); + StoreInst *Store = new StoreInst(Def, Alloca, /*volatile*/ false, + DL.getABITypeAlign(Def->getType())); if (Instruction *Inst = dyn_cast<Instruction>(Def)) { if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) { // InvokeInst is a terminator so the store need to be inserted into its @@ -1966,7 +2024,9 @@ chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain, "non noop cast is found during rematerialization"); Type *SrcTy = CI->getOperand(0)->getType(); - Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy, CI); + Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy, + TargetTransformInfo::TCK_SizeAndLatency, + CI); } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) { // Cost of the address calculation @@ -2344,9 +2404,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // That Value* no longer exists and we need to use the new gc_result. // Thankfully, the live set is embedded in the statepoint (and updated), so // we just grab that. - Statepoint Statepoint(Info.StatepointToken); - Live.insert(Live.end(), Statepoint.gc_args_begin(), - Statepoint.gc_args_end()); + Live.insert(Live.end(), Info.StatepointToken->gc_args_begin(), + Info.StatepointToken->gc_args_end()); #ifndef NDEBUG // Do some basic sanity checks on our liveness results before performing // relocation. Relocation can and will turn mistakes in liveness results @@ -2354,7 +2413,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // TODO: It would be nice to test consistency as well assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) && "statepoint must be reachable or liveness is meaningless"); - for (Value *V : Statepoint.gc_args()) { + for (Value *V : Info.StatepointToken->gc_args()) { if (!isa<Instruction>(V)) // Non-instruction values trivial dominate all possible uses continue; @@ -2523,7 +2582,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT, auto NeedsRewrite = [&TLI](Instruction &I) { if (const auto *Call = dyn_cast<CallBase>(&I)) - return !callsGCLeafFunction(Call, TLI) && !isStatepoint(Call); + return !callsGCLeafFunction(Call, TLI) && !isa<GCStatepointInst>(Call); return false; }; @@ -2608,10 +2667,10 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT, unsigned VF = 0; for (unsigned i = 0; i < I.getNumOperands(); i++) - if (I.getOperand(i)->getType()->isVectorTy()) { + if (auto *OpndVTy = dyn_cast<VectorType>(I.getOperand(i)->getType())) { assert(VF == 0 || - VF == I.getOperand(i)->getType()->getVectorNumElements()); - VF = I.getOperand(i)->getType()->getVectorNumElements(); + VF == cast<FixedVectorType>(OpndVTy)->getNumElements()); + VF = cast<FixedVectorType>(OpndVTy)->getNumElements(); } // It's the vector to scalar traversal through the pointer operand which |