aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp489
1 files changed, 308 insertions, 181 deletions
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index b795ad3899bc..51e4a5773f3e 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -258,6 +258,7 @@ struct GCPtrLivenessData {
// base relation will remain. Internally, we add a mixture of the two
// types, then update all the second type to the first type
using DefiningValueMapTy = MapVector<Value *, Value *>;
+using IsKnownBaseMapTy = MapVector<Value *, bool>;
using PointerToBaseTy = MapVector<Value *, Value *>;
using StatepointLiveSetTy = SetVector<Value *>;
using RematerializedValueMapTy =
@@ -281,19 +282,29 @@ struct PartiallyConstructedSafepointRecord {
RematerializedValueMapTy RematerializedValues;
};
+struct RematerizlizationCandidateRecord {
+ // Chain from derived pointer to base.
+ SmallVector<Instruction *, 3> ChainToBase;
+ // Original base.
+ Value *RootOfChain;
+ // Cost of chain.
+ InstructionCost Cost;
+};
+using RematCandTy = MapVector<Value *, RematerizlizationCandidateRecord>;
+
} // end anonymous namespace
static ArrayRef<Use> GetDeoptBundleOperands(const CallBase *Call) {
Optional<OperandBundleUse> DeoptBundle =
Call->getOperandBundle(LLVMContext::OB_deopt);
- if (!DeoptBundle.hasValue()) {
+ if (!DeoptBundle) {
assert(AllowStatepointWithNoDeoptInfo &&
"Found non-leaf call without deopt info!");
return None;
}
- return DeoptBundle.getValue().Inputs;
+ return DeoptBundle->Inputs;
}
/// Compute the live-in set for every basic block in the function
@@ -385,45 +396,16 @@ static void analyzeParsePointLiveness(
Result.LiveSet = LiveSet;
}
-// Returns true is V is a knownBaseResult.
-static bool isKnownBaseResult(Value *V);
-
-// Returns true if V is a BaseResult that already exists in the IR, i.e. it is
-// not created by the findBasePointers algorithm.
-static bool isOriginalBaseResult(Value *V);
+/// Returns true if V is a known base.
+static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases);
-namespace {
-
-/// A single base defining value - An immediate base defining value for an
-/// instruction 'Def' is an input to 'Def' whose base is also a base of 'Def'.
-/// For instructions which have multiple pointer [vector] inputs or that
-/// transition between vector and scalar types, there is no immediate base
-/// defining value. The 'base defining value' for 'Def' is the transitive
-/// closure of this relation stopping at the first instruction which has no
-/// immediate base defining value. The b.d.v. might itself be a base pointer,
-/// but it can also be an arbitrary derived pointer.
-struct BaseDefiningValueResult {
- /// Contains the value which is the base defining value.
- Value * const BDV;
+/// Caches the IsKnownBase flag for a value and asserts that it wasn't present
+/// in the cache before.
+static void setKnownBase(Value *V, bool IsKnownBase,
+ IsKnownBaseMapTy &KnownBases);
- /// True if the base defining value is also known to be an actual base
- /// pointer.
- const bool IsKnownBase;
-
- BaseDefiningValueResult(Value *BDV, bool IsKnownBase)
- : BDV(BDV), IsKnownBase(IsKnownBase) {
-#ifndef NDEBUG
- // Check consistency between new and old means of checking whether a BDV is
- // a base.
- bool MustBeBase = isKnownBaseResult(BDV);
- assert(!MustBeBase || MustBeBase == IsKnownBase);
-#endif
- }
-};
-
-} // end anonymous namespace
-
-static BaseDefiningValueResult findBaseDefiningValue(Value *I);
+static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache,
+ IsKnownBaseMapTy &KnownBases);
/// Return a base defining value for the 'Index' element of the given vector
/// instruction 'I'. If Index is null, returns a BDV for the entire vector
@@ -434,76 +416,122 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I);
/// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
/// If the later, the return pointer is a BDV (or possibly a base) for the
/// particular element in 'I'.
-static BaseDefiningValueResult
-findBaseDefiningValueOfVector(Value *I) {
+static Value *findBaseDefiningValueOfVector(Value *I, DefiningValueMapTy &Cache,
+ IsKnownBaseMapTy &KnownBases) {
// Each case parallels findBaseDefiningValue below, see that code for
// detailed motivation.
- if (isa<Argument>(I))
+ auto Cached = Cache.find(I);
+ if (Cached != Cache.end())
+ return Cached->second;
+
+ if (isa<Argument>(I)) {
// An incoming argument to the function is a base pointer
- return BaseDefiningValueResult(I, true);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
- if (isa<Constant>(I))
+ if (isa<Constant>(I)) {
// Base of constant vector consists only of constant null pointers.
// For reasoning see similar case inside 'findBaseDefiningValue' function.
- return BaseDefiningValueResult(ConstantAggregateZero::get(I->getType()),
- true);
+ auto *CAZ = ConstantAggregateZero::get(I->getType());
+ Cache[I] = CAZ;
+ setKnownBase(CAZ, /* IsKnownBase */true, KnownBases);
+ return CAZ;
+ }
- if (isa<LoadInst>(I))
- return BaseDefiningValueResult(I, true);
+ if (isa<LoadInst>(I)) {
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
- if (isa<InsertElementInst>(I))
+ if (isa<InsertElementInst>(I)) {
// We don't know whether this vector contains entirely base pointers or
// not. To be conservatively correct, we treat it as a BDV and will
// duplicate code as needed to construct a parallel vector of bases.
- return BaseDefiningValueResult(I, false);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */false, KnownBases);
+ return I;
+ }
- if (isa<ShuffleVectorInst>(I))
+ if (isa<ShuffleVectorInst>(I)) {
// We don't know whether this vector contains entirely base pointers or
// not. To be conservatively correct, we treat it as a BDV and will
// duplicate code as needed to construct a parallel vector of bases.
// TODO: There a number of local optimizations which could be applied here
// for particular sufflevector patterns.
- return BaseDefiningValueResult(I, false);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */false, KnownBases);
+ return I;
+ }
// The behavior of getelementptr instructions is the same for vector and
// non-vector data types.
- if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
- return findBaseDefiningValue(GEP->getPointerOperand());
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ auto *BDV =
+ findBaseDefiningValue(GEP->getPointerOperand(), Cache, KnownBases);
+ Cache[GEP] = BDV;
+ return BDV;
+ }
+
+ // The behavior of freeze instructions is the same for vector and
+ // non-vector data types.
+ if (auto *Freeze = dyn_cast<FreezeInst>(I)) {
+ auto *BDV = findBaseDefiningValue(Freeze->getOperand(0), Cache, KnownBases);
+ Cache[Freeze] = BDV;
+ return BDV;
+ }
// If the pointer comes through a bitcast of a vector of pointers to
// a vector of another type of pointer, then look through the bitcast
- if (auto *BC = dyn_cast<BitCastInst>(I))
- return findBaseDefiningValue(BC->getOperand(0));
+ if (auto *BC = dyn_cast<BitCastInst>(I)) {
+ auto *BDV = findBaseDefiningValue(BC->getOperand(0), Cache, KnownBases);
+ Cache[BC] = BDV;
+ return BDV;
+ }
// We assume that functions in the source language only return base
// pointers. This should probably be generalized via attributes to support
// both source language and internal functions.
- if (isa<CallInst>(I) || isa<InvokeInst>(I))
- return BaseDefiningValueResult(I, true);
+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
// A PHI or Select is a base defining value. The outer findBasePointer
// algorithm is responsible for constructing a base value for this BDV.
assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
"unknown vector instruction - no base found for vector element");
- return BaseDefiningValueResult(I, false);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */false, KnownBases);
+ return I;
}
/// Helper function for findBasePointer - Will return a value which either a)
/// defines the base pointer for the input, b) blocks the simple search
/// (i.e. a PHI or Select of two derived pointers), or c) involves a change
/// from pointer to vector type or back.
-static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
+static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache,
+ IsKnownBaseMapTy &KnownBases) {
assert(I->getType()->isPtrOrPtrVectorTy() &&
"Illegal to ask for the base pointer of a non-pointer type");
+ auto Cached = Cache.find(I);
+ if (Cached != Cache.end())
+ return Cached->second;
if (I->getType()->isVectorTy())
- return findBaseDefiningValueOfVector(I);
+ return findBaseDefiningValueOfVector(I, Cache, KnownBases);
- if (isa<Argument>(I))
+ if (isa<Argument>(I)) {
// An incoming argument to the function is a base pointer
// We should have never reached here if this argument isn't an gc value
- return BaseDefiningValueResult(I, true);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
if (isa<Constant>(I)) {
// We assume that objects with a constant base (e.g. a global) can't move
@@ -516,8 +544,10 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// "phi (const1, const2)" or "phi (const, regular gc ptr)".
// See constant.ll file for relevant test cases.
- return BaseDefiningValueResult(
- ConstantPointerNull::get(cast<PointerType>(I->getType())), true);
+ auto *CPN = ConstantPointerNull::get(cast<PointerType>(I->getType()));
+ Cache[I] = CPN;
+ setKnownBase(CPN, /* IsKnownBase */true, KnownBases);
+ return CPN;
}
// inttoptrs in an integral address space are currently ill-defined. We
@@ -525,8 +555,11 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// constant rule above and because we don't really have a better semantic
// to give them. Note that the optimizer is always free to insert undefined
// behavior on dynamically dead paths as well.
- if (isa<IntToPtrInst>(I))
- return BaseDefiningValueResult(I, true);
+ if (isa<IntToPtrInst>(I)) {
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
if (CastInst *CI = dyn_cast<CastInst>(I)) {
Value *Def = CI->stripPointerCasts();
@@ -539,16 +572,31 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// not simply a pointer cast (i.e. an inttoptr). We don't know how to
// handle int->ptr conversion.
assert(!isa<CastInst>(Def) && "shouldn't find another cast here");
- return findBaseDefiningValue(Def);
+ auto *BDV = findBaseDefiningValue(Def, Cache, KnownBases);
+ Cache[CI] = BDV;
+ return BDV;
}
- if (isa<LoadInst>(I))
+ if (isa<LoadInst>(I)) {
// The value loaded is an gc base itself
- return BaseDefiningValueResult(I, true);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
// The base of this GEP is the base
- return findBaseDefiningValue(GEP->getPointerOperand());
+ auto *BDV =
+ findBaseDefiningValue(GEP->getPointerOperand(), Cache, KnownBases);
+ Cache[GEP] = BDV;
+ return BDV;
+ }
+
+ if (auto *Freeze = dyn_cast<FreezeInst>(I)) {
+ auto *BDV = findBaseDefiningValue(Freeze->getOperand(0), Cache, KnownBases);
+ Cache[Freeze] = BDV;
+ return BDV;
+ }
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
@@ -569,24 +617,32 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
llvm_unreachable(
"interaction with the gcroot mechanism is not supported");
case Intrinsic::experimental_gc_get_pointer_base:
- return findBaseDefiningValue(II->getOperand(0));
+ auto *BDV = findBaseDefiningValue(II->getOperand(0), Cache, KnownBases);
+ Cache[II] = BDV;
+ return BDV;
}
}
// We assume that functions in the source language only return base
// pointers. This should probably be generalized via attributes to support
// both source language and internal functions.
- if (isa<CallInst>(I) || isa<InvokeInst>(I))
- return BaseDefiningValueResult(I, true);
+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
// TODO: I have absolutely no idea how to implement this part yet. It's not
// necessarily hard, I just haven't really looked at it yet.
assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
- if (isa<AtomicCmpXchgInst>(I))
+ if (isa<AtomicCmpXchgInst>(I)) {
// A CAS is effectively a atomic store and load combined under a
// predicate. From the perspective of base pointers, we just treat it
// like a load.
- return BaseDefiningValueResult(I, true);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
"binary ops which don't apply to pointers");
@@ -594,8 +650,11 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// The aggregate ops. Aggregates can either be in the heap or on the
// stack, but in either case, this is simply a field load. As a result,
// this is a defining definition of the base just like a load is.
- if (isa<ExtractValueInst>(I))
- return BaseDefiningValueResult(I, true);
+ if (isa<ExtractValueInst>(I)) {
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
// We should never see an insert vector since that would require we be
// tracing back a struct value not a pointer value.
@@ -606,6 +665,8 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// substituting gc.get.pointer.base() intrinsic.
bool IsKnownBase =
isa<Instruction>(I) && cast<Instruction>(I)->getMetadata("is_base_value");
+ setKnownBase(I, /* IsKnownBase */IsKnownBase, KnownBases);
+ Cache[I] = I;
// An extractelement produces a base result exactly when it's input does.
// We may need to insert a parallel instruction to extract the appropriate
@@ -615,33 +676,38 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// Note: There a lot of obvious peephole cases here. This are deliberately
// handled after the main base pointer inference algorithm to make writing
// test cases to exercise that code easier.
- return BaseDefiningValueResult(I, IsKnownBase);
+ return I;
// The last two cases here don't return a base pointer. Instead, they
// return a value which dynamically selects from among several base
// derived pointers (each with it's own base potentially). It's the job of
// the caller to resolve these.
assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
- "missing instruction case in findBaseDefiningValing");
- return BaseDefiningValueResult(I, IsKnownBase);
+ "missing instruction case in findBaseDefiningValue");
+ return I;
}
/// Returns the base defining value for this value.
-static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) {
- Value *&Cached = Cache[I];
- if (!Cached) {
- Cached = findBaseDefiningValue(I).BDV;
+static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache,
+ IsKnownBaseMapTy &KnownBases) {
+ if (Cache.find(I) == Cache.end()) {
+ auto *BDV = findBaseDefiningValue(I, Cache, KnownBases);
+ Cache[I] = BDV;
LLVM_DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> "
- << Cached->getName() << "\n");
+ << Cache[I]->getName() << ", is known base = "
+ << KnownBases[I] << "\n");
}
assert(Cache[I] != nullptr);
- return Cached;
+ assert(KnownBases.find(Cache[I]) != KnownBases.end() &&
+ "Cached value must be present in known bases map");
+ return Cache[I];
}
/// Return a base pointer for this value if known. Otherwise, return it's
/// base defining value.
-static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
- Value *Def = findBaseDefiningValueCached(I, Cache);
+static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache,
+ IsKnownBaseMapTy &KnownBases) {
+ Value *Def = findBaseDefiningValueCached(I, Cache, KnownBases);
auto Found = Cache.find(Def);
if (Found != Cache.end()) {
// Either a base-of relation, or a self reference. Caller must check.
@@ -651,6 +717,7 @@ static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
return Def;
}
+#ifndef NDEBUG
/// This value is a base pointer that is not generated by RS4GC, i.e. it already
/// exists in the code.
static bool isOriginalBaseResult(Value *V) {
@@ -659,21 +726,22 @@ static bool isOriginalBaseResult(Value *V) {
!isa<ExtractElementInst>(V) && !isa<InsertElementInst>(V) &&
!isa<ShuffleVectorInst>(V);
}
+#endif
-/// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
-/// is it known to be a base pointer? Or do we need to continue searching.
-static bool isKnownBaseResult(Value *V) {
- if (isOriginalBaseResult(V))
- return true;
- if (isa<Instruction>(V) &&
- cast<Instruction>(V)->getMetadata("is_base_value")) {
- // This is a previously inserted base phi or select. We know
- // that this is a base value.
- return true;
- }
+static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases) {
+ auto It = KnownBases.find(V);
+ assert(It != KnownBases.end() && "Value not present in the map");
+ return It->second;
+}
- // We need to keep searching
- return false;
+static void setKnownBase(Value *V, bool IsKnownBase,
+ IsKnownBaseMapTy &KnownBases) {
+#ifndef NDEBUG
+ auto It = KnownBases.find(V);
+ if (It != KnownBases.end())
+ assert(It->second == IsKnownBase && "Changing already present value");
+#endif
+ KnownBases[V] = IsKnownBase;
}
// Returns true if First and Second values are both scalar or both vector.
@@ -801,10 +869,11 @@ static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) {
/// For gc objects, this is simply itself. On success, returns a value which is
/// the base pointer. (This is reliable and can be used for relocation.) On
/// failure, returns nullptr.
-static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
- Value *Def = findBaseOrBDV(I, Cache);
+static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
+ IsKnownBaseMapTy &KnownBases) {
+ Value *Def = findBaseOrBDV(I, Cache, KnownBases);
- if (isKnownBaseResult(Def) && areBothVectorOrScalar(Def, I))
+ if (isKnownBase(Def, KnownBases) && areBothVectorOrScalar(Def, I))
return Def;
// Here's the rough algorithm:
@@ -887,8 +956,8 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
assert(!isOriginalBaseResult(Current) && "why did it get added?");
auto visitIncomingValue = [&](Value *InVal) {
- Value *Base = findBaseOrBDV(InVal, Cache);
- if (isKnownBaseResult(Base) && areBothVectorOrScalar(Base, InVal))
+ Value *Base = findBaseOrBDV(InVal, Cache, KnownBases);
+ if (isKnownBase(Base, KnownBases) && areBothVectorOrScalar(Base, InVal))
// Known bases won't need new instructions introduced and can be
// ignored safely. However, this can only be done when InVal and Base
// are both scalar or both vector. Otherwise, we need to find a
@@ -924,12 +993,16 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
for (auto Pair : States) {
Value *BDV = Pair.first;
auto canPruneInput = [&](Value *V) {
- Value *BDV = findBaseOrBDV(V, Cache);
- if (V->stripPointerCasts() != BDV)
+ // If the input of the BDV is the BDV itself we can prune it. This is
+ // only possible if the BDV is a PHI node.
+ if (V->stripPointerCasts() == BDV)
+ return true;
+ Value *VBDV = findBaseOrBDV(V, Cache, KnownBases);
+ if (V->stripPointerCasts() != VBDV)
return false;
// The assumption is that anything not in the state list is
// propagates a base pointer.
- return States.count(BDV) == 0;
+ return States.count(VBDV) == 0;
};
bool CanPrune = true;
@@ -975,13 +1048,13 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// Only values that do not have known bases or those that have differing
// type (scalar versus vector) from a possible known base should be in the
// lattice.
- assert((!isKnownBaseResult(BDV) ||
+ assert((!isKnownBase(BDV, KnownBases) ||
!areBothVectorOrScalar(BDV, Pair.second.getBaseValue())) &&
"why did it get added?");
BDVState NewState(BDV);
visitBDVOperands(BDV, [&](Value *Op) {
- Value *BDV = findBaseOrBDV(Op, Cache);
+ Value *BDV = findBaseOrBDV(Op, Cache, KnownBases);
auto OpState = GetStateForBDV(BDV, Op);
NewState.meet(OpState);
});
@@ -1014,8 +1087,9 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// Only values that do not have known bases or those that have differing
// type (scalar versus vector) from a possible known base should be in the
// lattice.
- assert((!isKnownBaseResult(I) || !areBothVectorOrScalar(I, BaseValue)) &&
- "why did it get added?");
+ assert(
+ (!isKnownBase(I, KnownBases) || !areBothVectorOrScalar(I, BaseValue)) &&
+ "why did it get added?");
assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
if (!State.isBase() || !isa<VectorType>(BaseValue->getType()))
@@ -1033,6 +1107,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
State.getBaseValue(), EE->getIndexOperand(), "base_ee", EE);
BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
States[I] = BDVState(I, BDVState::Base, BaseInst);
+ setKnownBase(BaseInst, /* IsKnownBase */true, KnownBases);
} else if (!isa<VectorType>(I->getType())) {
// We need to handle cases that have a vector base but the instruction is
// a scalar type (these could be phis or selects or any instruction that
@@ -1055,7 +1130,8 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// Only values that do not have known bases or those that have differing
// type (scalar versus vector) from a possible known base should be in the
// lattice.
- assert((!isKnownBaseResult(I) || !areBothVectorOrScalar(I, State.getBaseValue())) &&
+ assert((!isKnownBase(I, KnownBases) ||
+ !areBothVectorOrScalar(I, State.getBaseValue())) &&
"why did it get added?");
assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
@@ -1087,6 +1163,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// Add metadata marking this as a base value
BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
States[I] = BDVState(I, BDVState::Conflict, BaseInst);
+ setKnownBase(BaseInst, /* IsKnownBase */true, KnownBases);
}
#ifndef NDEBUG
@@ -1102,7 +1179,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// assured to be able to determine an instruction which produces it's base
// pointer.
auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) {
- Value *BDV = findBaseOrBDV(Input, Cache);
+ Value *BDV = findBaseOrBDV(Input, Cache, KnownBases);
Value *Base = nullptr;
if (!States.count(BDV)) {
assert(areBothVectorOrScalar(BDV, Input));
@@ -1129,7 +1206,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// Only values that do not have known bases or those that have differing
// type (scalar versus vector) from a possible known base should be in the
// lattice.
- assert((!isKnownBaseResult(BDV) ||
+ assert((!isKnownBase(BDV, KnownBases) ||
!areBothVectorOrScalar(BDV, State.getBaseValue())) &&
"why did it get added?");
assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
@@ -1154,13 +1231,21 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
#ifndef NDEBUG
Value *OldBase = BlockToValue[InBB];
Value *Base = getBaseForInput(InVal, nullptr);
+
+ // We can't use `stripPointerCasts` instead of this function because
+ // `stripPointerCasts` doesn't handle vectors of pointers.
+ auto StripBitCasts = [](Value *V) -> Value * {
+ while (auto *BC = dyn_cast<BitCastInst>(V))
+ V = BC->getOperand(0);
+ return V;
+ };
// In essence this assert states: the only way two values
// incoming from the same basic block may be different is by
// being different bitcasts of the same value. A cleanup
// that remains TODO is changing findBaseOrBDV to return an
// llvm::Value of the correct type (and still remain pure).
// This will remove the need to add bitcasts.
- assert(Base->stripPointerCasts() == OldBase->stripPointerCasts() &&
+ assert(StripBitCasts(Base) == StripBitCasts(OldBase) &&
"findBaseOrBDV should be pure!");
#endif
}
@@ -1223,8 +1308,9 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// Only values that do not have known bases or those that have differing
// type (scalar versus vector) from a possible known base should be in the
// lattice.
- assert((!isKnownBaseResult(BDV) || !areBothVectorOrScalar(BDV, Base)) &&
- "why did it get added?");
+ assert(
+ (!isKnownBase(BDV, KnownBases) || !areBothVectorOrScalar(BDV, Base)) &&
+ "why did it get added?");
LLVM_DEBUG(
dbgs() << "Updating base value cache"
@@ -1255,9 +1341,10 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// pointer was a base pointer.
static void findBasePointers(const StatepointLiveSetTy &live,
PointerToBaseTy &PointerToBase, DominatorTree *DT,
- DefiningValueMapTy &DVCache) {
+ DefiningValueMapTy &DVCache,
+ IsKnownBaseMapTy &KnownBases) {
for (Value *ptr : live) {
- Value *base = findBasePointer(ptr, DVCache);
+ Value *base = findBasePointer(ptr, DVCache, KnownBases);
assert(base && "failed to find base pointer");
PointerToBase[ptr] = base;
assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) ||
@@ -1272,7 +1359,8 @@ static void findBasePointers(const StatepointLiveSetTy &live,
static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
CallBase *Call,
PartiallyConstructedSafepointRecord &result,
- PointerToBaseTy &PointerToBase) {
+ PointerToBaseTy &PointerToBase,
+ IsKnownBaseMapTy &KnownBases) {
StatepointLiveSetTy PotentiallyDerivedPointers = result.LiveSet;
// We assume that all pointers passed to deopt are base pointers; as an
// optimization, we can use this to avoid seperately materializing the base
@@ -1286,7 +1374,8 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
PotentiallyDerivedPointers.remove(V);
PointerToBase[V] = V;
}
- findBasePointers(PotentiallyDerivedPointers, PointerToBase, &DT, DVCache);
+ findBasePointers(PotentiallyDerivedPointers, PointerToBase, &DT, DVCache,
+ KnownBases);
}
/// Given an updated version of the dataflow liveness results, update the
@@ -1349,23 +1438,23 @@ static constexpr Attribute::AttrKind FnAttrsToStrip[] =
// Create new attribute set containing only attributes which can be transferred
// from original call to the safepoint.
static AttributeList legalizeCallAttributes(LLVMContext &Ctx,
- AttributeList AL) {
- if (AL.isEmpty())
- return AL;
+ AttributeList OrigAL,
+ AttributeList StatepointAL) {
+ if (OrigAL.isEmpty())
+ return StatepointAL;
// Remove the readonly, readnone, and statepoint function attributes.
- AttrBuilder FnAttrs(Ctx, AL.getFnAttrs());
+ AttrBuilder FnAttrs(Ctx, OrigAL.getFnAttrs());
for (auto Attr : FnAttrsToStrip)
FnAttrs.removeAttribute(Attr);
- for (Attribute A : AL.getFnAttrs()) {
+ for (Attribute A : OrigAL.getFnAttrs()) {
if (isStatepointDirectiveAttr(A))
FnAttrs.removeAttribute(A);
}
// Just skip parameter and return attributes for now
- return AttributeList::get(Ctx, AttributeList::FunctionIndex,
- AttributeSet::get(Ctx, FnAttrs));
+ return StatepointAL.addFnAttributes(Ctx, FnAttrs);
}
/// Helper function to place all gc relocates necessary for the given
@@ -1570,8 +1659,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
assert(DeoptLowering.equals("live-through") && "Unsupported value!");
}
- Value *CallTarget = Call->getCalledOperand();
- if (Function *F = dyn_cast<Function>(CallTarget)) {
+ FunctionCallee CallTarget(Call->getFunctionType(), Call->getCalledOperand());
+ if (Function *F = dyn_cast<Function>(CallTarget.getCallee())) {
auto IID = F->getIntrinsicID();
if (IID == Intrinsic::experimental_deoptimize) {
// Calls to llvm.experimental.deoptimize are lowered to calls to the
@@ -1589,8 +1678,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
// the same module. This is fine -- we assume the frontend knew what it
// was doing when generating this kind of IR.
CallTarget = F->getParent()
- ->getOrInsertFunction("__llvm_deoptimize", FTy)
- .getCallee();
+ ->getOrInsertFunction("__llvm_deoptimize", FTy);
IsDeoptimize = true;
} else if (IID == Intrinsic::memcpy_element_unordered_atomic ||
@@ -1686,8 +1774,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
CallTarget =
F->getParent()
- ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy)
- .getCallee();
+ ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy);
}
}
@@ -1705,8 +1792,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
// function attributes. In case if we can handle this set of attributes -
// set up function attrs directly on statepoint and return attrs later for
// gc_result intrinsic.
- SPCall->setAttributes(
- legalizeCallAttributes(CI->getContext(), CI->getAttributes()));
+ SPCall->setAttributes(legalizeCallAttributes(
+ CI->getContext(), CI->getAttributes(), SPCall->getAttributes()));
Token = cast<GCStatepointInst>(SPCall);
@@ -1732,8 +1819,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
// function attributes. In case if we can handle this set of attributes -
// set up function attrs directly on statepoint and return attrs later for
// gc_result intrinsic.
- SPInvoke->setAttributes(
- legalizeCallAttributes(II->getContext(), II->getAttributes()));
+ SPInvoke->setAttributes(legalizeCallAttributes(
+ II->getContext(), II->getAttributes(), SPInvoke->getAttributes()));
Token = cast<GCStatepointInst>(SPInvoke);
@@ -2071,6 +2158,7 @@ static void relocationViaAlloca(
assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues &&
"we must have the same allocas with lives");
+ (void) NumRematerializedValues;
if (!PromotableAllocas.empty()) {
// Apply mem2reg to promote alloca to SSA
PromoteMemToReg(PromotableAllocas, DT);
@@ -2221,27 +2309,25 @@ static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPh
return true;
}
-// From the statepoint live set pick values that are cheaper to recompute then
-// to relocate. Remove this values from the live set, rematerialize them after
-// statepoint and record them in "Info" structure. Note that similar to
-// relocated values we don't do any user adjustments here.
-static void rematerializeLiveValues(CallBase *Call,
- PartiallyConstructedSafepointRecord &Info,
- PointerToBaseTy &PointerToBase,
- TargetTransformInfo &TTI) {
+// Find derived pointers that can be recomputed cheap enough and fill
+// RematerizationCandidates with such candidates.
+static void
+findRematerializationCandidates(PointerToBaseTy PointerToBase,
+ RematCandTy &RematerizationCandidates,
+ TargetTransformInfo &TTI) {
const unsigned int ChainLengthThreshold = 10;
- // Record values we are going to delete from this statepoint live set.
- // We can not di this in following loop due to iterator invalidation.
- SmallVector<Value *, 32> LiveValuesToBeDeleted;
+ for (auto P2B : PointerToBase) {
+ auto *Derived = P2B.first;
+ auto *Base = P2B.second;
+ // Consider only derived pointers.
+ if (Derived == Base)
+ continue;
- for (Value *LiveValue: Info.LiveSet) {
- // For each live pointer find its defining chain
+ // For each live pointer find its defining chain.
SmallVector<Instruction *, 3> ChainToBase;
- assert(PointerToBase.count(LiveValue));
Value *RootOfChain =
- findRematerializableChainToBasePointer(ChainToBase,
- LiveValue);
+ findRematerializableChainToBasePointer(ChainToBase, Derived);
// Nothing to do, or chain is too long
if ( ChainToBase.size() == 0 ||
@@ -2250,9 +2336,9 @@ static void rematerializeLiveValues(CallBase *Call,
// Handle the scenario where the RootOfChain is not equal to the
// Base Value, but they are essentially the same phi values.
- if (RootOfChain != PointerToBase[LiveValue]) {
+ if (RootOfChain != PointerToBase[Derived]) {
PHINode *OrigRootPhi = dyn_cast<PHINode>(RootOfChain);
- PHINode *AlternateRootPhi = dyn_cast<PHINode>(PointerToBase[LiveValue]);
+ PHINode *AlternateRootPhi = dyn_cast<PHINode>(PointerToBase[Derived]);
if (!OrigRootPhi || !AlternateRootPhi)
continue;
// PHI nodes that have the same incoming values, and belonging to the same
@@ -2266,33 +2352,61 @@ static void rematerializeLiveValues(CallBase *Call,
// deficiency in the findBasePointer algorithm.
if (!AreEquivalentPhiNodes(*OrigRootPhi, *AlternateRootPhi))
continue;
- // Now that the phi nodes are proved to be the same, assert that
- // findBasePointer's newly generated AlternateRootPhi is present in the
- // liveset of the call.
- assert(Info.LiveSet.count(AlternateRootPhi));
}
- // Compute cost of this chain
+ // Compute cost of this chain.
InstructionCost Cost = chainToBasePointerCost(ChainToBase, TTI);
// TODO: We can also account for cases when we will be able to remove some
// of the rematerialized values by later optimization passes. I.e if
// we rematerialized several intersecting chains. Or if original values
// don't have any uses besides this statepoint.
+ // Ok, there is a candidate.
+ RematerizlizationCandidateRecord Record;
+ Record.ChainToBase = ChainToBase;
+ Record.RootOfChain = RootOfChain;
+ Record.Cost = Cost;
+ RematerizationCandidates.insert({ Derived, Record });
+ }
+}
+
+// From the statepoint live set pick values that are cheaper to recompute then
+// to relocate. Remove this values from the live set, rematerialize them after
+// statepoint and record them in "Info" structure. Note that similar to
+// relocated values we don't do any user adjustments here.
+static void rematerializeLiveValues(CallBase *Call,
+ PartiallyConstructedSafepointRecord &Info,
+ PointerToBaseTy &PointerToBase,
+ RematCandTy &RematerizationCandidates,
+ TargetTransformInfo &TTI) {
+ // Record values we are going to delete from this statepoint live set.
+ // We can not di this in following loop due to iterator invalidation.
+ SmallVector<Value *, 32> LiveValuesToBeDeleted;
+
+ for (Value *LiveValue : Info.LiveSet) {
+ auto It = RematerizationCandidates.find(LiveValue);
+ if (It == RematerizationCandidates.end())
+ continue;
+
+ RematerizlizationCandidateRecord &Record = It->second;
+
+ InstructionCost Cost = Record.Cost;
// For invokes we need to rematerialize each chain twice - for normal and
// for unwind basic blocks. Model this by multiplying cost by two.
- if (isa<InvokeInst>(Call)) {
+ if (isa<InvokeInst>(Call))
Cost *= 2;
- }
- // If it's too expensive - skip it
+
+ // If it's too expensive - skip it.
if (Cost >= RematerializationThreshold)
continue;
// Remove value from the live set
LiveValuesToBeDeleted.push_back(LiveValue);
- // Clone instructions and record them inside "Info" structure
+ // Clone instructions and record them inside "Info" structure.
- // Walk backwards to visit top-most instructions first
+ // For each live pointer find get its defining chain.
+ SmallVector<Instruction *, 3> ChainToBase = Record.ChainToBase;
+ // Walk backwards to visit top-most instructions first.
std::reverse(ChainToBase.begin(), ChainToBase.end());
// Utility function which clones all instructions from "ChainToBase"
@@ -2352,7 +2466,7 @@ static void rematerializeLiveValues(CallBase *Call,
Instruction *InsertBefore = Call->getNextNode();
assert(InsertBefore);
Instruction *RematerializedValue = rematerializeChain(
- InsertBefore, RootOfChain, PointerToBase[LiveValue]);
+ InsertBefore, Record.RootOfChain, PointerToBase[LiveValue]);
Info.RematerializedValues[RematerializedValue] = LiveValue;
} else {
auto *Invoke = cast<InvokeInst>(Call);
@@ -2363,9 +2477,9 @@ static void rematerializeLiveValues(CallBase *Call,
&*Invoke->getUnwindDest()->getFirstInsertionPt();
Instruction *NormalRematerializedValue = rematerializeChain(
- NormalInsertBefore, RootOfChain, PointerToBase[LiveValue]);
+ NormalInsertBefore, Record.RootOfChain, PointerToBase[LiveValue]);
Instruction *UnwindRematerializedValue = rematerializeChain(
- UnwindInsertBefore, RootOfChain, PointerToBase[LiveValue]);
+ UnwindInsertBefore, Record.RootOfChain, PointerToBase[LiveValue]);
Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
@@ -2380,7 +2494,8 @@ static void rematerializeLiveValues(CallBase *Call,
static bool inlineGetBaseAndOffset(Function &F,
SmallVectorImpl<CallInst *> &Intrinsics,
- DefiningValueMapTy &DVCache) {
+ DefiningValueMapTy &DVCache,
+ IsKnownBaseMapTy &KnownBases) {
auto &Context = F.getContext();
auto &DL = F.getParent()->getDataLayout();
bool Changed = false;
@@ -2389,7 +2504,8 @@ static bool inlineGetBaseAndOffset(Function &F,
switch (Callsite->getIntrinsicID()) {
case Intrinsic::experimental_gc_get_pointer_base: {
Changed = true;
- Value *Base = findBasePointer(Callsite->getOperand(0), DVCache);
+ Value *Base =
+ findBasePointer(Callsite->getOperand(0), DVCache, KnownBases);
assert(!DVCache.count(Callsite));
auto *BaseBC = IRBuilder<>(Callsite).CreateBitCast(
Base, Callsite->getType(), suffixed_name_or(Base, ".cast", ""));
@@ -2404,7 +2520,7 @@ static bool inlineGetBaseAndOffset(Function &F,
case Intrinsic::experimental_gc_get_pointer_offset: {
Changed = true;
Value *Derived = Callsite->getOperand(0);
- Value *Base = findBasePointer(Derived, DVCache);
+ Value *Base = findBasePointer(Derived, DVCache, KnownBases);
assert(!DVCache.count(Callsite));
unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
@@ -2431,7 +2547,8 @@ static bool inlineGetBaseAndOffset(Function &F,
static bool insertParsePoints(Function &F, DominatorTree &DT,
TargetTransformInfo &TTI,
SmallVectorImpl<CallBase *> &ToUpdate,
- DefiningValueMapTy &DVCache) {
+ DefiningValueMapTy &DVCache,
+ IsKnownBaseMapTy &KnownBases) {
#ifndef NDEBUG
// Validate the input
std::set<CallBase *> Uniqued;
@@ -2487,7 +2604,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// B) Find the base pointers for each live pointer
for (size_t i = 0; i < Records.size(); i++) {
PartiallyConstructedSafepointRecord &info = Records[i];
- findBasePointers(DT, DVCache, ToUpdate[i], info, PointerToBase);
+ findBasePointers(DT, DVCache, ToUpdate[i], info, PointerToBase, KnownBases);
}
if (PrintBasePointers) {
errs() << "Base Pairs (w/o Relocation):\n";
@@ -2563,11 +2680,16 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
Holders.clear();
+ // Compute the cost of possible re-materialization of derived pointers.
+ RematCandTy RematerizationCandidates;
+ findRematerializationCandidates(PointerToBase, RematerizationCandidates, TTI);
+
// In order to reduce live set of statepoint we might choose to rematerialize
// some values instead of relocating them. This is purely an optimization and
// does not influence correctness.
for (size_t i = 0; i < Records.size(); i++)
- rematerializeLiveValues(ToUpdate[i], Records[i], PointerToBase, TTI);
+ rematerializeLiveValues(ToUpdate[i], Records[i], PointerToBase,
+ RematerizationCandidates, TTI);
// We need this to safely RAUW and delete call or invoke return values that
// may themselves be live over a statepoint. For details, please see usage in
@@ -2930,13 +3052,18 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
// inlineGetBaseAndOffset() and insertParsePoints().
DefiningValueMapTy DVCache;
+ // Mapping between a base values and a flag indicating whether it's a known
+ // base or not.
+ IsKnownBaseMapTy KnownBases;
+
if (!Intrinsics.empty())
// Inline @gc.get.pointer.base() and @gc.get.pointer.offset() before finding
// live references.
- MadeChange |= inlineGetBaseAndOffset(F, Intrinsics, DVCache);
+ MadeChange |= inlineGetBaseAndOffset(F, Intrinsics, DVCache, KnownBases);
if (!ParsePointNeeded.empty())
- MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded, DVCache);
+ MadeChange |=
+ insertParsePoints(F, DT, TTI, ParsePointNeeded, DVCache, KnownBases);
return MadeChange;
}