diff options
Diffstat (limited to 'llvm/lib/Analysis')
87 files changed, 41496 insertions, 6288 deletions
diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp index 1c7678a602d81..fec2415a0e459 100644 --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -58,7 +58,7 @@ using namespace llvm; /// Allow disabling BasicAA from the AA results. This is particularly useful /// when testing to isolate a single AA implementation. -static cl::opt<bool> DisableBasicAA("disable-basicaa", cl::Hidden, +static cl::opt<bool> DisableBasicAA("disable-basic-aa", cl::Hidden, cl::init(false)); AAResults::AAResults(AAResults &&Arg) @@ -196,8 +196,7 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call, // Try to refine the mod-ref info further using other API entry points to the // aggregate set of AA results. auto MRB = getModRefBehavior(Call); - if (MRB == FMRB_DoesNotAccessMemory || - MRB == FMRB_OnlyAccessesInaccessibleMem) + if (onlyAccessesInaccessibleMem(MRB)) return ModRefInfo::NoModRef; if (onlyReadsMemory(MRB)) @@ -631,16 +630,14 @@ ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW, /// Return information about whether a particular call site modifies /// or reads the specified memory location \p MemLoc before instruction \p I -/// in a BasicBlock. An ordered basic block \p OBB can be used to speed up -/// instruction-ordering queries inside the BasicBlock containing \p I. +/// in a BasicBlock. /// FIXME: this is really just shoring-up a deficiency in alias analysis. /// BasicAA isn't willing to spend linear time determining whether an alloca /// was captured before or after this particular call, while we are. However, /// with a smarter AA in place, this test is just wasting compile time. ModRefInfo AAResults::callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, - DominatorTree *DT, - OrderedBasicBlock *OBB) { + DominatorTree *DT) { if (!DT) return ModRefInfo::ModRef; @@ -656,8 +653,7 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I, if (PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true, /* StoreCaptures */ true, I, DT, - /* include Object */ true, - /* OrderedBasicBlock */ OBB)) + /* include Object */ true)) return ModRefInfo::ModRef; unsigned ArgNo = 0; diff --git a/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp index 2e44bbd3a8ca5..b1433c579af81 100644 --- a/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -114,7 +114,7 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) { Stores.insert(&*I); Instruction &Inst = *I; if (auto *Call = dyn_cast<CallBase>(&Inst)) { - Value *Callee = Call->getCalledValue(); + Value *Callee = Call->getCalledOperand(); // Skip actual functions for direct function calls. if (!isa<Function>(Callee) && isInterestingPointer(Callee)) Pointers.insert(Callee); diff --git a/llvm/lib/Analysis/AliasAnalysisSummary.cpp b/llvm/lib/Analysis/AliasAnalysisSummary.cpp index 2f3396a44117a..d9c5732da1f33 100644 --- a/llvm/lib/Analysis/AliasAnalysisSummary.cpp +++ b/llvm/lib/Analysis/AliasAnalysisSummary.cpp @@ -1,5 +1,6 @@ #include "AliasAnalysisSummary.h" #include "llvm/IR/Argument.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Type.h" #include "llvm/Support/Compiler.h" diff --git a/llvm/lib/Analysis/AliasAnalysisSummary.h b/llvm/lib/Analysis/AliasAnalysisSummary.h index fe75b03cedef0..10d49f9c0113b 100644 --- a/llvm/lib/Analysis/AliasAnalysisSummary.h +++ b/llvm/lib/Analysis/AliasAnalysisSummary.h @@ -37,10 +37,13 @@ #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/IR/InstrTypes.h" #include <bitset> namespace llvm { + +class CallBase; +class Value; + namespace cflaa { //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Analysis/AliasSetTracker.cpp b/llvm/lib/Analysis/AliasSetTracker.cpp index 5cc5ab597ef90..5cc68f05dc0ec 100644 --- a/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/llvm/lib/Analysis/AliasSetTracker.cpp @@ -677,7 +677,7 @@ void AliasSet::print(raw_ostream &OS) const { I.getPointer()->printAsOperand(OS << "("); if (I.getSize() == LocationSize::unknown()) OS << ", unknown)"; - else + else OS << ", " << I.getSize() << ")"; } } diff --git a/llvm/lib/Analysis/AssumeBundleQueries.cpp b/llvm/lib/Analysis/AssumeBundleQueries.cpp new file mode 100644 index 0000000000000..05fe05a0bd851 --- /dev/null +++ b/llvm/lib/Analysis/AssumeBundleQueries.cpp @@ -0,0 +1,213 @@ +//===- AssumeBundleQueries.cpp - tool to query assume bundles ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "assume-queries" + +#include "llvm/Analysis/AssumeBundleQueries.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/DebugCounter.h" + +using namespace llvm; +using namespace llvm::PatternMatch; + +STATISTIC(NumAssumeQueries, "Number of Queries into an assume assume bundles"); +STATISTIC( + NumUsefullAssumeQueries, + "Number of Queries into an assume assume bundles that were satisfied"); + +DEBUG_COUNTER(AssumeQueryCounter, "assume-queries-counter", + "Controls which assumes gets created"); + +static bool bundleHasArgument(const CallBase::BundleOpInfo &BOI, unsigned Idx) { + return BOI.End - BOI.Begin > Idx; +} + +static Value *getValueFromBundleOpInfo(CallInst &Assume, + const CallBase::BundleOpInfo &BOI, + unsigned Idx) { + assert(bundleHasArgument(BOI, Idx) && "index out of range"); + return (Assume.op_begin() + BOI.Begin + Idx)->get(); +} + +bool llvm::hasAttributeInAssume(CallInst &AssumeCI, Value *IsOn, + StringRef AttrName, uint64_t *ArgVal) { + assert(isa<IntrinsicInst>(AssumeCI) && + "this function is intended to be used on llvm.assume"); + IntrinsicInst &Assume = cast<IntrinsicInst>(AssumeCI); + assert(Assume.getIntrinsicID() == Intrinsic::assume && + "this function is intended to be used on llvm.assume"); + assert(Attribute::isExistingAttribute(AttrName) && + "this attribute doesn't exist"); + assert((ArgVal == nullptr || Attribute::doesAttrKindHaveArgument( + Attribute::getAttrKindFromName(AttrName))) && + "requested value for an attribute that has no argument"); + if (Assume.bundle_op_infos().empty()) + return false; + + for (auto &BOI : Assume.bundle_op_infos()) { + if (BOI.Tag->getKey() != AttrName) + continue; + if (IsOn && (BOI.End - BOI.Begin <= ABA_WasOn || + IsOn != getValueFromBundleOpInfo(Assume, BOI, ABA_WasOn))) + continue; + if (ArgVal) { + assert(BOI.End - BOI.Begin > ABA_Argument); + *ArgVal = + cast<ConstantInt>(getValueFromBundleOpInfo(Assume, BOI, ABA_Argument)) + ->getZExtValue(); + } + return true; + } + return false; +} + +void llvm::fillMapFromAssume(CallInst &AssumeCI, RetainedKnowledgeMap &Result) { + IntrinsicInst &Assume = cast<IntrinsicInst>(AssumeCI); + assert(Assume.getIntrinsicID() == Intrinsic::assume && + "this function is intended to be used on llvm.assume"); + for (auto &Bundles : Assume.bundle_op_infos()) { + std::pair<Value *, Attribute::AttrKind> Key{ + nullptr, Attribute::getAttrKindFromName(Bundles.Tag->getKey())}; + if (bundleHasArgument(Bundles, ABA_WasOn)) + Key.first = getValueFromBundleOpInfo(Assume, Bundles, ABA_WasOn); + + if (Key.first == nullptr && Key.second == Attribute::None) + continue; + if (!bundleHasArgument(Bundles, ABA_Argument)) { + Result[Key][&Assume] = {0, 0}; + continue; + } + unsigned Val = cast<ConstantInt>( + getValueFromBundleOpInfo(Assume, Bundles, ABA_Argument)) + ->getZExtValue(); + auto Lookup = Result.find(Key); + if (Lookup == Result.end() || !Lookup->second.count(&Assume)) { + Result[Key][&Assume] = {Val, Val}; + continue; + } + Lookup->second[&Assume].Min = std::min(Val, Lookup->second[&Assume].Min); + Lookup->second[&Assume].Max = std::max(Val, Lookup->second[&Assume].Max); + } +} + +RetainedKnowledge +llvm::getKnowledgeFromBundle(CallInst &Assume, + const CallBase::BundleOpInfo &BOI) { + RetainedKnowledge Result; + Result.AttrKind = Attribute::getAttrKindFromName(BOI.Tag->getKey()); + if (bundleHasArgument(BOI, ABA_WasOn)) + Result.WasOn = getValueFromBundleOpInfo(Assume, BOI, ABA_WasOn); + auto GetArgOr1 = [&](unsigned Idx) -> unsigned { + if (auto *ConstInt = dyn_cast<ConstantInt>( + getValueFromBundleOpInfo(Assume, BOI, ABA_Argument + Idx))) + return ConstInt->getZExtValue(); + return 1; + }; + if (BOI.End - BOI.Begin > ABA_Argument) + Result.ArgValue = GetArgOr1(0); + if (Result.AttrKind == Attribute::Alignment) + if (BOI.End - BOI.Begin > ABA_Argument + 1) + Result.ArgValue = MinAlign(Result.ArgValue, GetArgOr1(1)); + return Result; +} + +RetainedKnowledge llvm::getKnowledgeFromOperandInAssume(CallInst &AssumeCI, + unsigned Idx) { + IntrinsicInst &Assume = cast<IntrinsicInst>(AssumeCI); + assert(Assume.getIntrinsicID() == Intrinsic::assume && + "this function is intended to be used on llvm.assume"); + CallBase::BundleOpInfo BOI = Assume.getBundleOpInfoForOperand(Idx); + return getKnowledgeFromBundle(AssumeCI, BOI); +} + +bool llvm::isAssumeWithEmptyBundle(CallInst &CI) { + IntrinsicInst &Assume = cast<IntrinsicInst>(CI); + assert(Assume.getIntrinsicID() == Intrinsic::assume && + "this function is intended to be used on llvm.assume"); + return none_of(Assume.bundle_op_infos(), + [](const CallBase::BundleOpInfo &BOI) { + return BOI.Tag->getKey() != IgnoreBundleTag; + }); +} + +static CallInst::BundleOpInfo *getBundleFromUse(const Use *U) { + auto *Intr = dyn_cast<IntrinsicInst>(U->getUser()); + if (!match(U->getUser(), + m_Intrinsic<Intrinsic::assume>(m_Unless(m_Specific(U->get()))))) + return nullptr; + return &Intr->getBundleOpInfoForOperand(U->getOperandNo()); +} + +RetainedKnowledge +llvm::getKnowledgeFromUse(const Use *U, + ArrayRef<Attribute::AttrKind> AttrKinds) { + CallInst::BundleOpInfo* Bundle = getBundleFromUse(U); + if (!Bundle) + return RetainedKnowledge::none(); + RetainedKnowledge RK = + getKnowledgeFromBundle(*cast<CallInst>(U->getUser()), *Bundle); + for (auto Attr : AttrKinds) + if (Attr == RK.AttrKind) + return RK; + return RetainedKnowledge::none(); +} + +RetainedKnowledge +llvm::getKnowledgeForValue(const Value *V, + ArrayRef<Attribute::AttrKind> AttrKinds, + AssumptionCache *AC, + function_ref<bool(RetainedKnowledge, Instruction *, + const CallBase::BundleOpInfo *)> + Filter) { + NumAssumeQueries++; + if (!DebugCounter::shouldExecute(AssumeQueryCounter)) + return RetainedKnowledge::none(); + if (AC) { + for (AssumptionCache::ResultElem &Elem : AC->assumptionsFor(V)) { + IntrinsicInst *II = cast_or_null<IntrinsicInst>(Elem.Assume); + if (!II || Elem.Index == AssumptionCache::ExprResultIdx) + continue; + if (RetainedKnowledge RK = getKnowledgeFromBundle( + *II, II->bundle_op_info_begin()[Elem.Index])) + if (is_contained(AttrKinds, RK.AttrKind) && + Filter(RK, II, &II->bundle_op_info_begin()[Elem.Index])) { + NumUsefullAssumeQueries++; + return RK; + } + } + return RetainedKnowledge::none(); + } + for (const auto &U : V->uses()) { + CallInst::BundleOpInfo* Bundle = getBundleFromUse(&U); + if (!Bundle) + continue; + if (RetainedKnowledge RK = + getKnowledgeFromBundle(*cast<CallInst>(U.getUser()), *Bundle)) + if (is_contained(AttrKinds, RK.AttrKind) && + Filter(RK, cast<Instruction>(U.getUser()), Bundle)) { + NumUsefullAssumeQueries++; + return RK; + } + } + return RetainedKnowledge::none(); +} + +RetainedKnowledge llvm::getKnowledgeValidInContext( + const Value *V, ArrayRef<Attribute::AttrKind> AttrKinds, + const Instruction *CtxI, const DominatorTree *DT, AssumptionCache *AC) { + return getKnowledgeForValue(V, AttrKinds, AC, + [&](auto, Instruction *I, auto) { + return isValidAssumeForContext(I, CtxI, DT); + }); +} diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp index f4d4a5ac8f88c..16bfd5c75902f 100644 --- a/llvm/lib/Analysis/AssumptionCache.cpp +++ b/llvm/lib/Analysis/AssumptionCache.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -41,7 +42,7 @@ static cl::opt<bool> cl::desc("Enable verification of assumption cache"), cl::init(false)); -SmallVector<WeakTrackingVH, 1> & +SmallVector<AssumptionCache::ResultElem, 1> & AssumptionCache::getOrInsertAffectedValues(Value *V) { // Try using find_as first to avoid creating extra value handles just for the // purpose of doing the lookup. @@ -50,32 +51,39 @@ AssumptionCache::getOrInsertAffectedValues(Value *V) { return AVI->second; auto AVIP = AffectedValues.insert( - {AffectedValueCallbackVH(V, this), SmallVector<WeakTrackingVH, 1>()}); + {AffectedValueCallbackVH(V, this), SmallVector<ResultElem, 1>()}); return AVIP.first->second; } -static void findAffectedValues(CallInst *CI, - SmallVectorImpl<Value *> &Affected) { +static void +findAffectedValues(CallInst *CI, + SmallVectorImpl<AssumptionCache::ResultElem> &Affected) { // Note: This code must be kept in-sync with the code in // computeKnownBitsFromAssume in ValueTracking. - auto AddAffected = [&Affected](Value *V) { + auto AddAffected = [&Affected](Value *V, unsigned Idx = + AssumptionCache::ExprResultIdx) { if (isa<Argument>(V)) { - Affected.push_back(V); + Affected.push_back({V, Idx}); } else if (auto *I = dyn_cast<Instruction>(V)) { - Affected.push_back(I); + Affected.push_back({I, Idx}); // Peek through unary operators to find the source of the condition. Value *Op; if (match(I, m_BitCast(m_Value(Op))) || - match(I, m_PtrToInt(m_Value(Op))) || - match(I, m_Not(m_Value(Op)))) { + match(I, m_PtrToInt(m_Value(Op))) || match(I, m_Not(m_Value(Op)))) { if (isa<Instruction>(Op) || isa<Argument>(Op)) - Affected.push_back(Op); + Affected.push_back({Op, Idx}); } } }; + for (unsigned Idx = 0; Idx != CI->getNumOperandBundles(); Idx++) { + if (CI->getOperandBundleAt(Idx).Inputs.size() > ABA_WasOn && + CI->getOperandBundleAt(Idx).getTagName() != IgnoreBundleTag) + AddAffected(CI->getOperandBundleAt(Idx).Inputs[ABA_WasOn], Idx); + } + Value *Cond = CI->getArgOperand(0), *A, *B; AddAffected(Cond); @@ -112,28 +120,44 @@ static void findAffectedValues(CallInst *CI, } void AssumptionCache::updateAffectedValues(CallInst *CI) { - SmallVector<Value *, 16> Affected; + SmallVector<AssumptionCache::ResultElem, 16> Affected; findAffectedValues(CI, Affected); for (auto &AV : Affected) { - auto &AVV = getOrInsertAffectedValues(AV); - if (std::find(AVV.begin(), AVV.end(), CI) == AVV.end()) - AVV.push_back(CI); + auto &AVV = getOrInsertAffectedValues(AV.Assume); + if (std::find_if(AVV.begin(), AVV.end(), [&](ResultElem &Elem) { + return Elem.Assume == CI && Elem.Index == AV.Index; + }) == AVV.end()) + AVV.push_back({CI, AV.Index}); } } void AssumptionCache::unregisterAssumption(CallInst *CI) { - SmallVector<Value *, 16> Affected; + SmallVector<AssumptionCache::ResultElem, 16> Affected; findAffectedValues(CI, Affected); for (auto &AV : Affected) { - auto AVI = AffectedValues.find_as(AV); - if (AVI != AffectedValues.end()) + auto AVI = AffectedValues.find_as(AV.Assume); + if (AVI == AffectedValues.end()) + continue; + bool Found = false; + bool HasNonnull = false; + for (ResultElem &Elem : AVI->second) { + if (Elem.Assume == CI) { + Found = true; + Elem.Assume = nullptr; + } + HasNonnull |= !!Elem.Assume; + if (HasNonnull && Found) + break; + } + assert(Found && "already unregistered or incorrect cache state"); + if (!HasNonnull) AffectedValues.erase(AVI); } AssumeHandles.erase( - remove_if(AssumeHandles, [CI](WeakTrackingVH &VH) { return CI == VH; }), + remove_if(AssumeHandles, [CI](ResultElem &RE) { return CI == RE; }), AssumeHandles.end()); } @@ -177,7 +201,7 @@ void AssumptionCache::scanFunction() { for (BasicBlock &B : F) for (Instruction &II : B) if (match(&II, m_Intrinsic<Intrinsic::assume>())) - AssumeHandles.push_back(&II); + AssumeHandles.push_back({&II, ExprResultIdx}); // Mark the scan as complete. Scanned = true; @@ -196,7 +220,7 @@ void AssumptionCache::registerAssumption(CallInst *CI) { if (!Scanned) return; - AssumeHandles.push_back(CI); + AssumeHandles.push_back({CI, ExprResultIdx}); #ifndef NDEBUG assert(CI->getParent() && diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index e852d663c6b4b..74664098ce1d4 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -65,16 +65,16 @@ using namespace llvm; /// Enable analysis of recursive PHI nodes. -static cl::opt<bool> EnableRecPhiAnalysis("basicaa-recphi", cl::Hidden, +static cl::opt<bool> EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden, cl::init(false)); /// By default, even on 32-bit architectures we use 64-bit integers for /// calculations. This will allow us to more-aggressively decompose indexing /// expressions calculated using i64 values (e.g., long long in C) which is /// common enough to worry about. -static cl::opt<bool> ForceAtLeast64Bits("basicaa-force-at-least-64b", +static cl::opt<bool> ForceAtLeast64Bits("basic-aa-force-at-least-64b", cl::Hidden, cl::init(true)); -static cl::opt<bool> DoubleCalcBits("basicaa-double-calc-bits", +static cl::opt<bool> DoubleCalcBits("basic-aa-double-calc-bits", cl::Hidden, cl::init(false)); /// SearchLimitReached / SearchTimes shows how often the limit of @@ -433,7 +433,7 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL, /// an issue, for example, in particular for 32b pointers with negative indices /// that rely on two's complement wrap-arounds for precise alias information /// where the maximum pointer size is 64b. -static APInt adjustToPointerSize(APInt Offset, unsigned PointerSize) { +static APInt adjustToPointerSize(const APInt &Offset, unsigned PointerSize) { assert(PointerSize <= Offset.getBitWidth() && "Invalid PointerSize!"); unsigned ShiftBits = Offset.getBitWidth() - PointerSize; return (Offset << ShiftBits).ashr(ShiftBits); @@ -492,7 +492,13 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V, const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op); if (!GEPOp) { - if (const auto *Call = dyn_cast<CallBase>(V)) { + if (const auto *PHI = dyn_cast<PHINode>(V)) { + // Look through single-arg phi nodes created by LCSSA. + if (PHI->getNumIncomingValues() == 1) { + V = PHI->getIncomingValue(0); + continue; + } + } else if (const auto *Call = dyn_cast<CallBase>(V)) { // CaptureTracking can know about special capturing properties of some // intrinsics like launder.invariant.group, that can't be expressed with // the attributes, but have properties like returning aliasing pointer. @@ -508,19 +514,6 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V, } } - // If it's not a GEP, hand it off to SimplifyInstruction to see if it - // can come up with something. This matches what GetUnderlyingObject does. - if (const Instruction *I = dyn_cast<Instruction>(V)) - // TODO: Get a DominatorTree and AssumptionCache and use them here - // (these are both now available in this function, but this should be - // updated when GetUnderlyingObject is updated). TLI should be - // provided also. - if (const Value *Simplified = - SimplifyInstruction(const_cast<Instruction *>(I), DL)) { - V = Simplified; - continue; - } - Decomposed.Base = V; return false; } @@ -531,6 +524,14 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V, return false; } + // Don't attempt to analyze GEPs if index scale is not a compile-time + // constant. + if (isa<ScalableVectorType>(GEPOp->getSourceElementType())) { + Decomposed.Base = V; + Decomposed.HasCompileTimeConstantScale = false; + return false; + } + unsigned AS = GEPOp->getPointerAddressSpace(); // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. gep_type_iterator GTI = gep_type_begin(GEPOp); @@ -557,15 +558,16 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V, if (CIdx->isZero()) continue; Decomposed.OtherOffset += - (DL.getTypeAllocSize(GTI.getIndexedType()) * - CIdx->getValue().sextOrSelf(MaxPointerSize)) - .sextOrTrunc(MaxPointerSize); + (DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize() * + CIdx->getValue().sextOrSelf(MaxPointerSize)) + .sextOrTrunc(MaxPointerSize); continue; } GepHasConstantOffset = false; - APInt Scale(MaxPointerSize, DL.getTypeAllocSize(GTI.getIndexedType())); + APInt Scale(MaxPointerSize, + DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize()); unsigned ZExtBits = 0, SExtBits = 0; // If the integer type is smaller than the pointer size, it is implicitly @@ -723,7 +725,7 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const CallBase *Call) { if (Call->onlyReadsMemory()) Min = FMRB_OnlyReadsMemory; else if (Call->doesNotReadMemory()) - Min = FMRB_DoesNotReadMemory; + Min = FMRB_OnlyWritesMemory; if (Call->onlyAccessesArgMemory()) Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees); @@ -756,7 +758,7 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) { if (F->onlyReadsMemory()) Min = FMRB_OnlyReadsMemory; else if (F->doesNotReadMemory()) - Min = FMRB_DoesNotReadMemory; + Min = FMRB_OnlyWritesMemory; if (F->onlyAccessesArgMemory()) Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees); @@ -960,7 +962,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call, } } - // If the call is to malloc or calloc, we can assume that it doesn't + // If the call is malloc/calloc like, we can assume that it doesn't // modify any IR visible value. This is only valid because we assume these // routines do not read values visible in the IR. TODO: Consider special // casing realloc and strdup routines which access only their arguments as @@ -1145,11 +1147,11 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, GEP1->getSourceElementType(), IntermediateIndices); StructType *LastIndexedStruct = dyn_cast<StructType>(Ty); - if (isa<SequentialType>(Ty)) { + if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { // We know that: // - both GEPs begin indexing from the exact same pointer; // - the last indices in both GEPs are constants, indexing into a sequential - // type (array or pointer); + // type (array or vector); // - both GEPs only index through arrays prior to that. // // Because array indices greater than the number of elements are valid in @@ -1157,8 +1159,9 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, // GEP1 and GEP2 we cannot guarantee that the last indexed arrays don't // partially overlap. We also need to check that the loaded size matches // the element size, otherwise we could still have overlap. + Type *LastElementTy = GetElementPtrInst::getTypeAtIndex(Ty, (uint64_t)0); const uint64_t ElementSize = - DL.getTypeStoreSize(cast<SequentialType>(Ty)->getElementType()); + DL.getTypeStoreSize(LastElementTy).getFixedSize(); if (V1Size != ElementSize || V2Size != ElementSize) return MayAlias; @@ -1316,12 +1319,20 @@ AliasResult BasicAAResult::aliasGEP( unsigned MaxPointerSize = getMaxPointerSize(DL); DecompGEP1.StructOffset = DecompGEP1.OtherOffset = APInt(MaxPointerSize, 0); DecompGEP2.StructOffset = DecompGEP2.OtherOffset = APInt(MaxPointerSize, 0); + DecompGEP1.HasCompileTimeConstantScale = + DecompGEP2.HasCompileTimeConstantScale = true; bool GEP1MaxLookupReached = DecomposeGEPExpression(GEP1, DecompGEP1, DL, &AC, DT); bool GEP2MaxLookupReached = DecomposeGEPExpression(V2, DecompGEP2, DL, &AC, DT); + // Don't attempt to analyze the decomposed GEP if index scale is not a + // compile-time constant. + if (!DecompGEP1.HasCompileTimeConstantScale || + !DecompGEP2.HasCompileTimeConstantScale) + return MayAlias; + APInt GEP1BaseOffset = DecompGEP1.StructOffset + DecompGEP1.OtherOffset; APInt GEP2BaseOffset = DecompGEP2.StructOffset + DecompGEP2.OtherOffset; @@ -1713,6 +1724,10 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, // Other results are not possible. if (Alias == MayAlias) return MayAlias; + // With recursive phis we cannot guarantee that MustAlias/PartialAlias will + // remain valid to all elements and needs to conservatively return MayAlias. + if (isRecursive && Alias != NoAlias) + return MayAlias; // If all sources of the PHI node NoAlias or MustAlias V2, then returns // NoAlias / MustAlias. Otherwise, returns MayAlias. @@ -1978,7 +1993,7 @@ void BasicAAResult::GetIndexDifference( bool BasicAAResult::constantOffsetHeuristic( const SmallVectorImpl<VariableGEPIndex> &VarIndices, - LocationSize MaybeV1Size, LocationSize MaybeV2Size, APInt BaseOffset, + LocationSize MaybeV1Size, LocationSize MaybeV2Size, const APInt &BaseOffset, AssumptionCache *AC, DominatorTree *DT) { if (VarIndices.size() != 2 || MaybeV1Size == LocationSize::unknown() || MaybeV2Size == LocationSize::unknown()) @@ -2058,13 +2073,14 @@ char BasicAAWrapperPass::ID = 0; void BasicAAWrapperPass::anchor() {} -INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa", - "Basic Alias Analysis (stateless AA impl)", false, true) +INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basic-aa", + "Basic Alias Analysis (stateless AA impl)", true, true) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa", - "Basic Alias Analysis (stateless AA impl)", false, true) +INITIALIZE_PASS_DEPENDENCY(PhiValuesWrapperPass) +INITIALIZE_PASS_END(BasicAAWrapperPass, "basic-aa", + "Basic Alias Analysis (stateless AA impl)", true, true) FunctionPass *llvm::createBasicAAWrapperPass() { return new BasicAAWrapperPass(); diff --git a/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/llvm/lib/Analysis/BlockFrequencyInfo.cpp index 544bd7757ae4a..b9b1fded9de34 100644 --- a/llvm/lib/Analysis/BlockFrequencyInfo.cpp +++ b/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -98,7 +98,7 @@ static GVDAGType getGVDT() { template <> struct GraphTraits<BlockFrequencyInfo *> { using NodeRef = const BasicBlock *; - using ChildIteratorType = succ_const_iterator; + using ChildIteratorType = const_succ_iterator; using nodes_iterator = pointer_iterator<Function::const_iterator>; static NodeRef getEntryNode(const BlockFrequencyInfo *G) { @@ -287,6 +287,11 @@ void BlockFrequencyInfo::print(raw_ostream &OS) const { BFI->print(OS); } +void BlockFrequencyInfo::verifyMatch(BlockFrequencyInfo &Other) const { + if (BFI) + BFI->verifyMatch(*Other.BFI); +} + INITIALIZE_PASS_BEGIN(BlockFrequencyInfoWrapperPass, "block-freq", "Block Frequency Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) diff --git a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp index 0db6dd04a7e88..e4fda2472b3ac 100644 --- a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -40,6 +40,12 @@ using namespace llvm::bfi_detail; #define DEBUG_TYPE "block-freq" +cl::opt<bool> CheckBFIUnknownBlockQueries( + "check-bfi-unknown-block-queries", + cl::init(false), cl::Hidden, + cl::desc("Check if block frequency is queried for an unknown block " + "for debugging missed BFI updates")); + ScaledNumber<uint64_t> BlockMass::toScaled() const { if (isFull()) return ScaledNumber<uint64_t>(1, 0); @@ -550,8 +556,17 @@ void BlockFrequencyInfoImplBase::finalizeMetrics() { BlockFrequency BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const { - if (!Node.isValid()) + if (!Node.isValid()) { +#ifndef NDEBUG + if (CheckBFIUnknownBlockQueries) { + SmallString<256> Msg; + raw_svector_ostream OS(Msg); + OS << "*** Detected BFI query for unknown block " << getBlockName(Node); + report_fatal_error(OS.str()); + } +#endif return 0; + } return Freqs[Node.Index].Integer; } diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp index ffba65b5ed5ee..a396b5ad21c6a 100644 --- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -61,6 +61,7 @@ INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) @@ -101,7 +102,7 @@ static const uint32_t LBH_UNLIKELY_WEIGHT = 62; /// /// This is the probability for a branch being taken to a block that terminates /// (eventually) in unreachable. These are predicted as unlikely as possible. -/// All reachable probability will equally share the remaining part. +/// All reachable probability will proportionally share the remaining part. static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(1); /// Weight for a branch taken going into a cold block. @@ -240,7 +241,7 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { SmallVector<unsigned, 4> UnreachableEdges; SmallVector<unsigned, 4> ReachableEdges; - for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) + for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) if (PostDominatedByUnreachable.count(*I)) UnreachableEdges.push_back(I.getSuccessorIndex()); else @@ -250,10 +251,13 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { if (UnreachableEdges.empty()) return false; + SmallVector<BranchProbability, 4> EdgeProbabilities( + BB->getTerminator()->getNumSuccessors(), BranchProbability::getUnknown()); if (ReachableEdges.empty()) { BranchProbability Prob(1, UnreachableEdges.size()); for (unsigned SuccIdx : UnreachableEdges) - setEdgeProbability(BB, SuccIdx, Prob); + EdgeProbabilities[SuccIdx] = Prob; + setEdgeProbability(BB, EdgeProbabilities); return true; } @@ -263,10 +267,11 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { ReachableEdges.size(); for (unsigned SuccIdx : UnreachableEdges) - setEdgeProbability(BB, SuccIdx, UnreachableProb); + EdgeProbabilities[SuccIdx] = UnreachableProb; for (unsigned SuccIdx : ReachableEdges) - setEdgeProbability(BB, SuccIdx, ReachableProb); + EdgeProbabilities[SuccIdx] = ReachableProb; + setEdgeProbability(BB, EdgeProbabilities); return true; } @@ -277,7 +282,8 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { const Instruction *TI = BB->getTerminator(); assert(TI->getNumSuccessors() > 1 && "expected more than one successor!"); - if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || isa<IndirectBrInst>(TI))) + if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || isa<IndirectBrInst>(TI) || + isa<InvokeInst>(TI))) return false; MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof); @@ -300,19 +306,19 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { SmallVector<unsigned, 2> UnreachableIdxs; SmallVector<unsigned, 2> ReachableIdxs; Weights.reserve(TI->getNumSuccessors()); - for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) { + for (unsigned I = 1, E = WeightsNode->getNumOperands(); I != E; ++I) { ConstantInt *Weight = - mdconst::dyn_extract<ConstantInt>(WeightsNode->getOperand(i)); + mdconst::dyn_extract<ConstantInt>(WeightsNode->getOperand(I)); if (!Weight) return false; assert(Weight->getValue().getActiveBits() <= 32 && "Too many bits for uint32_t"); Weights.push_back(Weight->getZExtValue()); WeightSum += Weights.back(); - if (PostDominatedByUnreachable.count(TI->getSuccessor(i - 1))) - UnreachableIdxs.push_back(i - 1); + if (PostDominatedByUnreachable.count(TI->getSuccessor(I - 1))) + UnreachableIdxs.push_back(I - 1); else - ReachableIdxs.push_back(i - 1); + ReachableIdxs.push_back(I - 1); } assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); @@ -323,47 +329,93 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { if (ScalingFactor > 1) { WeightSum = 0; - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - Weights[i] /= ScalingFactor; - WeightSum += Weights[i]; + for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { + Weights[I] /= ScalingFactor; + WeightSum += Weights[I]; } } assert(WeightSum <= UINT32_MAX && "Expected weights to scale down to 32 bits"); if (WeightSum == 0 || ReachableIdxs.size() == 0) { - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - Weights[i] = 1; + for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) + Weights[I] = 1; WeightSum = TI->getNumSuccessors(); } // Set the probability. SmallVector<BranchProbability, 2> BP; - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - BP.push_back({ Weights[i], static_cast<uint32_t>(WeightSum) }); + for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) + BP.push_back({ Weights[I], static_cast<uint32_t>(WeightSum) }); // Examine the metadata against unreachable heuristic. // If the unreachable heuristic is more strong then we use it for this edge. - if (UnreachableIdxs.size() > 0 && ReachableIdxs.size() > 0) { - auto ToDistribute = BranchProbability::getZero(); - auto UnreachableProb = UR_TAKEN_PROB; - for (auto i : UnreachableIdxs) - if (UnreachableProb < BP[i]) { - ToDistribute += BP[i] - UnreachableProb; - BP[i] = UnreachableProb; - } + if (UnreachableIdxs.size() == 0 || ReachableIdxs.size() == 0) { + setEdgeProbability(BB, BP); + return true; + } + + auto UnreachableProb = UR_TAKEN_PROB; + for (auto I : UnreachableIdxs) + if (UnreachableProb < BP[I]) { + BP[I] = UnreachableProb; + } - // If we modified the probability of some edges then we must distribute - // the difference between reachable blocks. - if (ToDistribute > BranchProbability::getZero()) { - BranchProbability PerEdge = ToDistribute / ReachableIdxs.size(); - for (auto i : ReachableIdxs) - BP[i] += PerEdge; + // Sum of all edge probabilities must be 1.0. If we modified the probability + // of some edges then we must distribute the introduced difference over the + // reachable blocks. + // + // Proportional distribution: the relation between probabilities of the + // reachable edges is kept unchanged. That is for any reachable edges i and j: + // newBP[i] / newBP[j] == oldBP[i] / oldBP[j] => + // newBP[i] / oldBP[i] == newBP[j] / oldBP[j] == K + // Where K is independent of i,j. + // newBP[i] == oldBP[i] * K + // We need to find K. + // Make sum of all reachables of the left and right parts: + // sum_of_reachable(newBP) == K * sum_of_reachable(oldBP) + // Sum of newBP must be equal to 1.0: + // sum_of_reachable(newBP) + sum_of_unreachable(newBP) == 1.0 => + // sum_of_reachable(newBP) = 1.0 - sum_of_unreachable(newBP) + // Where sum_of_unreachable(newBP) is what has been just changed. + // Finally: + // K == sum_of_reachable(newBP) / sum_of_reachable(oldBP) => + // K == (1.0 - sum_of_unreachable(newBP)) / sum_of_reachable(oldBP) + BranchProbability NewUnreachableSum = BranchProbability::getZero(); + for (auto I : UnreachableIdxs) + NewUnreachableSum += BP[I]; + + BranchProbability NewReachableSum = + BranchProbability::getOne() - NewUnreachableSum; + + BranchProbability OldReachableSum = BranchProbability::getZero(); + for (auto I : ReachableIdxs) + OldReachableSum += BP[I]; + + if (OldReachableSum != NewReachableSum) { // Anything to dsitribute? + if (OldReachableSum.isZero()) { + // If all oldBP[i] are zeroes then the proportional distribution results + // in all zero probabilities and the error stays big. In this case we + // evenly spread NewReachableSum over the reachable edges. + BranchProbability PerEdge = NewReachableSum / ReachableIdxs.size(); + for (auto I : ReachableIdxs) + BP[I] = PerEdge; + } else { + for (auto I : ReachableIdxs) { + // We use uint64_t to avoid double rounding error of the following + // calculation: BP[i] = BP[i] * NewReachableSum / OldReachableSum + // The formula is taken from the private constructor + // BranchProbability(uint32_t Numerator, uint32_t Denominator) + uint64_t Mul = static_cast<uint64_t>(NewReachableSum.getNumerator()) * + BP[I].getNumerator(); + uint32_t Div = static_cast<uint32_t>( + divideNearest(Mul, OldReachableSum.getNumerator())); + BP[I] = BranchProbability::getRaw(Div); + } } } - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - setEdgeProbability(BB, i, BP[i]); + setEdgeProbability(BB, BP); return true; } @@ -386,7 +438,7 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) { // Determine which successors are post-dominated by a cold block. SmallVector<unsigned, 4> ColdEdges; SmallVector<unsigned, 4> NormalEdges; - for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) + for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) if (PostDominatedByColdCall.count(*I)) ColdEdges.push_back(I.getSuccessorIndex()); else @@ -396,10 +448,13 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) { if (ColdEdges.empty()) return false; + SmallVector<BranchProbability, 4> EdgeProbabilities( + BB->getTerminator()->getNumSuccessors(), BranchProbability::getUnknown()); if (NormalEdges.empty()) { BranchProbability Prob(1, ColdEdges.size()); for (unsigned SuccIdx : ColdEdges) - setEdgeProbability(BB, SuccIdx, Prob); + EdgeProbabilities[SuccIdx] = Prob; + setEdgeProbability(BB, EdgeProbabilities); return true; } @@ -411,10 +466,11 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) { (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(NormalEdges.size())); for (unsigned SuccIdx : ColdEdges) - setEdgeProbability(BB, SuccIdx, ColdProb); + EdgeProbabilities[SuccIdx] = ColdProb; for (unsigned SuccIdx : NormalEdges) - setEdgeProbability(BB, SuccIdx, NormalProb); + EdgeProbabilities[SuccIdx] = NormalProb; + setEdgeProbability(BB, EdgeProbabilities); return true; } @@ -437,19 +493,21 @@ bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) { assert(CI->getOperand(1)->getType()->isPointerTy()); + BranchProbability TakenProb(PH_TAKEN_WEIGHT, + PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); + BranchProbability UntakenProb(PH_NONTAKEN_WEIGHT, + PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); + // p != 0 -> isProb = true // p == 0 -> isProb = false // p != q -> isProb = true // p == q -> isProb = false; - unsigned TakenIdx = 0, NonTakenIdx = 1; bool isProb = CI->getPredicate() == ICmpInst::ICMP_NE; if (!isProb) - std::swap(TakenIdx, NonTakenIdx); + std::swap(TakenProb, UntakenProb); - BranchProbability TakenProb(PH_TAKEN_WEIGHT, - PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); - setEdgeProbability(BB, TakenIdx, TakenProb); - setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl()); + setEdgeProbability( + BB, SmallVector<BranchProbability, 2>({TakenProb, UntakenProb})); return true; } @@ -614,7 +672,7 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB, SmallVector<unsigned, 8> InEdges; // Edges from header to the loop. SmallVector<unsigned, 8> UnlikelyEdges; - for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { // Use LoopInfo if we have it, otherwise fall-back to SCC info to catch // irreducible loops. if (L) { @@ -646,18 +704,20 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB, (UnlikelyEdges.empty() ? 0 : LBH_UNLIKELY_WEIGHT) + (ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT); + SmallVector<BranchProbability, 4> EdgeProbabilities( + BB->getTerminator()->getNumSuccessors(), BranchProbability::getUnknown()); if (uint32_t numBackEdges = BackEdges.size()) { BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom); auto Prob = TakenProb / numBackEdges; for (unsigned SuccIdx : BackEdges) - setEdgeProbability(BB, SuccIdx, Prob); + EdgeProbabilities[SuccIdx] = Prob; } if (uint32_t numInEdges = InEdges.size()) { BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom); auto Prob = TakenProb / numInEdges; for (unsigned SuccIdx : InEdges) - setEdgeProbability(BB, SuccIdx, Prob); + EdgeProbabilities[SuccIdx] = Prob; } if (uint32_t numExitingEdges = ExitingEdges.size()) { @@ -665,7 +725,7 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB, Denom); auto Prob = NotTakenProb / numExitingEdges; for (unsigned SuccIdx : ExitingEdges) - setEdgeProbability(BB, SuccIdx, Prob); + EdgeProbabilities[SuccIdx] = Prob; } if (uint32_t numUnlikelyEdges = UnlikelyEdges.size()) { @@ -673,9 +733,10 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB, Denom); auto Prob = UnlikelyProb / numUnlikelyEdges; for (unsigned SuccIdx : UnlikelyEdges) - setEdgeProbability(BB, SuccIdx, Prob); + EdgeProbabilities[SuccIdx] = Prob; } + setEdgeProbability(BB, EdgeProbabilities); return true; } @@ -786,15 +847,15 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, return false; } - unsigned TakenIdx = 0, NonTakenIdx = 1; - - if (!isProb) - std::swap(TakenIdx, NonTakenIdx); - BranchProbability TakenProb(ZH_TAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); - setEdgeProbability(BB, TakenIdx, TakenProb); - setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl()); + BranchProbability UntakenProb(ZH_NONTAKEN_WEIGHT, + ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); + if (!isProb) + std::swap(TakenProb, UntakenProb); + + setEdgeProbability( + BB, SmallVector<BranchProbability, 2>({TakenProb, UntakenProb})); return true; } @@ -829,14 +890,13 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) { return false; } - unsigned TakenIdx = 0, NonTakenIdx = 1; - + BranchProbability TakenProb(TakenWeight, TakenWeight + NontakenWeight); + BranchProbability UntakenProb(NontakenWeight, TakenWeight + NontakenWeight); if (!isProb) - std::swap(TakenIdx, NonTakenIdx); + std::swap(TakenProb, UntakenProb); - BranchProbability TakenProb(TakenWeight, TakenWeight + NontakenWeight); - setEdgeProbability(BB, TakenIdx, TakenProb); - setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl()); + setEdgeProbability( + BB, SmallVector<BranchProbability, 2>({TakenProb, UntakenProb})); return true; } @@ -847,13 +907,23 @@ bool BranchProbabilityInfo::calcInvokeHeuristics(const BasicBlock *BB) { BranchProbability TakenProb(IH_TAKEN_WEIGHT, IH_TAKEN_WEIGHT + IH_NONTAKEN_WEIGHT); - setEdgeProbability(BB, 0 /*Index for Normal*/, TakenProb); - setEdgeProbability(BB, 1 /*Index for Unwind*/, TakenProb.getCompl()); + setEdgeProbability( + BB, SmallVector<BranchProbability, 2>({TakenProb, TakenProb.getCompl()})); return true; } void BranchProbabilityInfo::releaseMemory() { Probs.clear(); + Handles.clear(); +} + +bool BranchProbabilityInfo::invalidate(Function &, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &) { + // Check whether the analysis, all analyses on functions, or the function's + // CFG have been preserved. + auto PAC = PA.getChecker<BranchProbabilityAnalysis>(); + return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() || + PAC.preservedSet<CFGAnalyses>()); } void BranchProbabilityInfo::print(raw_ostream &OS) const { @@ -862,7 +932,7 @@ void BranchProbabilityInfo::print(raw_ostream &OS) const { // or the function it is currently running over. assert(LastF && "Cannot print prior to running over a function"); for (const auto &BI : *LastF) { - for (succ_const_iterator SI = succ_begin(&BI), SE = succ_end(&BI); SI != SE; + for (const_succ_iterator SI = succ_begin(&BI), SE = succ_end(&BI); SI != SE; ++SI) { printEdgeProbability(OS << " ", &BI, *SI); } @@ -881,7 +951,7 @@ BranchProbabilityInfo::getHotSucc(const BasicBlock *BB) const { auto MaxProb = BranchProbability::getZero(); const BasicBlock *MaxSucc = nullptr; - for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { const BasicBlock *Succ = *I; auto Prob = getEdgeProbability(BB, Succ); if (Prob > MaxProb) { @@ -914,7 +984,7 @@ BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, BranchProbability BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, - succ_const_iterator Dst) const { + const_succ_iterator Dst) const { return getEdgeProbability(Src, Dst.getSuccessorIndex()); } @@ -925,8 +995,10 @@ BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const { auto Prob = BranchProbability::getZero(); bool FoundProb = false; - for (succ_const_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I) + uint32_t EdgeCount = 0; + for (const_succ_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I) if (*I == Dst) { + ++EdgeCount; auto MapI = Probs.find(std::make_pair(Src, I.getSuccessorIndex())); if (MapI != Probs.end()) { FoundProb = true; @@ -934,7 +1006,7 @@ BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, } } uint32_t succ_num = std::distance(succ_begin(Src), succ_end(Src)); - return FoundProb ? Prob : BranchProbability(1, succ_num); + return FoundProb ? Prob : BranchProbability(EdgeCount, succ_num); } /// Set the edge probability for a given edge specified by PredBlock and an @@ -949,6 +1021,28 @@ void BranchProbabilityInfo::setEdgeProbability(const BasicBlock *Src, << "\n"); } +/// Set the edge probability for all edges at once. +void BranchProbabilityInfo::setEdgeProbability( + const BasicBlock *Src, const SmallVectorImpl<BranchProbability> &Probs) { + assert(Src->getTerminator()->getNumSuccessors() == Probs.size()); + if (Probs.size() == 0) + return; // Nothing to set. + + uint64_t TotalNumerator = 0; + for (unsigned SuccIdx = 0; SuccIdx < Probs.size(); ++SuccIdx) { + setEdgeProbability(Src, SuccIdx, Probs[SuccIdx]); + TotalNumerator += Probs[SuccIdx].getNumerator(); + } + + // Because of rounding errors the total probability cannot be checked to be + // 1.0 exactly. That is TotalNumerator == BranchProbability::getDenominator. + // Instead, every single probability in Probs must be as accurate as possible. + // This results in error 1/denominator at most, thus the total absolute error + // should be within Probs.size / BranchProbability::getDenominator. + assert(TotalNumerator <= BranchProbability::getDenominator() + Probs.size()); + assert(TotalNumerator >= BranchProbability::getDenominator() - Probs.size()); +} + raw_ostream & BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, const BasicBlock *Src, @@ -962,15 +1056,16 @@ BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, } void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) { - for (auto I = Probs.begin(), E = Probs.end(); I != E; ++I) { - auto Key = I->first; - if (Key.first == BB) - Probs.erase(Key); + for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + auto MapI = Probs.find(std::make_pair(BB, I.getSuccessorIndex())); + if (MapI != Probs.end()) + Probs.erase(MapI); } } void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + PostDominatorTree *PDT) { LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() << " ----\n\n"); LastF = &F; // Store the last function we ran on for printing. @@ -998,10 +1093,15 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, LLVM_DEBUG(dbgs() << "\n"); } - std::unique_ptr<PostDominatorTree> PDT = - std::make_unique<PostDominatorTree>(const_cast<Function &>(F)); - computePostDominatedByUnreachable(F, PDT.get()); - computePostDominatedByColdCall(F, PDT.get()); + std::unique_ptr<PostDominatorTree> PDTPtr; + + if (!PDT) { + PDTPtr = std::make_unique<PostDominatorTree>(const_cast<Function &>(F)); + PDT = PDTPtr.get(); + } + + computePostDominatedByUnreachable(F, PDT); + computePostDominatedByColdCall(F, PDT); // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. @@ -1047,6 +1147,7 @@ void BranchProbabilityInfoWrapperPass::getAnalysisUsage( AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<LoopInfoWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<PostDominatorTreeWrapperPass>(); AU.setPreservesAll(); } @@ -1054,7 +1155,9 @@ bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); - BPI.calculate(F, LI, &TLI); + PostDominatorTree &PDT = + getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); + BPI.calculate(F, LI, &TLI, &PDT); return false; } @@ -1069,7 +1172,9 @@ AnalysisKey BranchProbabilityAnalysis::Key; BranchProbabilityInfo BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) { BranchProbabilityInfo BPI; - BPI.calculate(F, AM.getResult<LoopAnalysis>(F), &AM.getResult<TargetLibraryAnalysis>(F)); + BPI.calculate(F, AM.getResult<LoopAnalysis>(F), + &AM.getResult<TargetLibraryAnalysis>(F), + &AM.getResult<PostDominatorTreeAnalysis>(F)); return BPI; } diff --git a/llvm/lib/Analysis/CFG.cpp b/llvm/lib/Analysis/CFG.cpp index 8215b4ecbb03d..b46a6951dd253 100644 --- a/llvm/lib/Analysis/CFG.cpp +++ b/llvm/lib/Analysis/CFG.cpp @@ -12,8 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CFG.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Dominators.h" @@ -31,16 +29,16 @@ void llvm::FindFunctionBackedges(const Function &F, return; SmallPtrSet<const BasicBlock*, 8> Visited; - SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack; + SmallVector<std::pair<const BasicBlock *, const_succ_iterator>, 8> VisitStack; SmallPtrSet<const BasicBlock*, 8> InStack; Visited.insert(BB); VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); InStack.insert(BB); do { - std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back(); + std::pair<const BasicBlock *, const_succ_iterator> &Top = VisitStack.back(); const BasicBlock *ParentBB = Top.first; - succ_const_iterator &I = Top.second; + const_succ_iterator &I = Top.second; bool FoundNew = false; while (I != succ_end(ParentBB)) { diff --git a/llvm/lib/Analysis/CFGPrinter.cpp b/llvm/lib/Analysis/CFGPrinter.cpp index 88e7d3bdede1a..cf4afc8cfd9cb 100644 --- a/llvm/lib/Analysis/CFGPrinter.cpp +++ b/llvm/lib/Analysis/CFGPrinter.cpp @@ -18,69 +18,135 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CFGPrinter.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" +#include <algorithm> + using namespace llvm; -static cl::opt<std::string> CFGFuncName( - "cfg-func-name", cl::Hidden, - cl::desc("The name of a function (or its substring)" - " whose CFG is viewed/printed.")); +static cl::opt<std::string> + CFGFuncName("cfg-func-name", cl::Hidden, + cl::desc("The name of a function (or its substring)" + " whose CFG is viewed/printed.")); static cl::opt<std::string> CFGDotFilenamePrefix( "cfg-dot-filename-prefix", cl::Hidden, cl::desc("The prefix used for the CFG dot file names.")); -namespace { - struct CFGViewerLegacyPass : public FunctionPass { - static char ID; // Pass identifcation, replacement for typeid - CFGViewerLegacyPass() : FunctionPass(ID) { - initializeCFGViewerLegacyPassPass(*PassRegistry::getPassRegistry()); - } +static cl::opt<bool> HideUnreachablePaths("cfg-hide-unreachable-paths", + cl::init(false)); - bool runOnFunction(Function &F) override { - F.viewCFG(); - return false; - } +static cl::opt<bool> HideDeoptimizePaths("cfg-hide-deoptimize-paths", + cl::init(false)); - void print(raw_ostream &OS, const Module* = nullptr) const override {} +static cl::opt<bool> ShowHeatColors("cfg-heat-colors", cl::init(true), + cl::Hidden, + cl::desc("Show heat colors in CFG")); - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - } - }; -} +static cl::opt<bool> UseRawEdgeWeight("cfg-raw-weights", cl::init(false), + cl::Hidden, + cl::desc("Use raw weights for labels. " + "Use percentages as default.")); -char CFGViewerLegacyPass::ID = 0; -INITIALIZE_PASS(CFGViewerLegacyPass, "view-cfg", "View CFG of function", false, true) +static cl::opt<bool> + ShowEdgeWeight("cfg-weights", cl::init(false), cl::Hidden, + cl::desc("Show edges labeled with weights")); -PreservedAnalyses CFGViewerPass::run(Function &F, - FunctionAnalysisManager &AM) { - F.viewCFG(); - return PreservedAnalyses::all(); +static void writeCFGToDotFile(Function &F, BlockFrequencyInfo *BFI, + BranchProbabilityInfo *BPI, uint64_t MaxFreq, + bool CFGOnly = false) { + std::string Filename = + (CFGDotFilenamePrefix + "." + F.getName() + ".dot").str(); + errs() << "Writing '" << Filename << "'..."; + + std::error_code EC; + raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + + DOTFuncInfo CFGInfo(&F, BFI, BPI, MaxFreq); + CFGInfo.setHeatColors(ShowHeatColors); + CFGInfo.setEdgeWeights(ShowEdgeWeight); + CFGInfo.setRawEdgeWeights(UseRawEdgeWeight); + + if (!EC) + WriteGraph(File, &CFGInfo, CFGOnly); + else + errs() << " error opening file for writing!"; + errs() << "\n"; } +static void viewCFG(Function &F, const BlockFrequencyInfo *BFI, + const BranchProbabilityInfo *BPI, uint64_t MaxFreq, + bool CFGOnly = false) { + DOTFuncInfo CFGInfo(&F, BFI, BPI, MaxFreq); + CFGInfo.setHeatColors(ShowHeatColors); + CFGInfo.setEdgeWeights(ShowEdgeWeight); + CFGInfo.setRawEdgeWeights(UseRawEdgeWeight); + + ViewGraph(&CFGInfo, "cfg." + F.getName(), CFGOnly); +} namespace { - struct CFGOnlyViewerLegacyPass : public FunctionPass { - static char ID; // Pass identifcation, replacement for typeid - CFGOnlyViewerLegacyPass() : FunctionPass(ID) { - initializeCFGOnlyViewerLegacyPassPass(*PassRegistry::getPassRegistry()); - } +struct CFGViewerLegacyPass : public FunctionPass { + static char ID; // Pass identifcation, replacement for typeid + CFGViewerLegacyPass() : FunctionPass(ID) { + initializeCFGViewerLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto *BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); + auto *BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); + viewCFG(F, BFI, BPI, getMaxFreq(F, BFI)); + return false; + } + + void print(raw_ostream &OS, const Module * = nullptr) const override {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + FunctionPass::getAnalysisUsage(AU); + AU.addRequired<BlockFrequencyInfoWrapperPass>(); + AU.addRequired<BranchProbabilityInfoWrapperPass>(); + AU.setPreservesAll(); + } +}; +} - bool runOnFunction(Function &F) override { - F.viewCFGOnly(); - return false; - } +char CFGViewerLegacyPass::ID = 0; +INITIALIZE_PASS(CFGViewerLegacyPass, "view-cfg", "View CFG of function", false, + true) - void print(raw_ostream &OS, const Module* = nullptr) const override {} +PreservedAnalyses CFGViewerPass::run(Function &F, FunctionAnalysisManager &AM) { + auto *BFI = &AM.getResult<BlockFrequencyAnalysis>(F); + auto *BPI = &AM.getResult<BranchProbabilityAnalysis>(F); + viewCFG(F, BFI, BPI, getMaxFreq(F, BFI)); + return PreservedAnalyses::all(); +} - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - } - }; +namespace { +struct CFGOnlyViewerLegacyPass : public FunctionPass { + static char ID; // Pass identifcation, replacement for typeid + CFGOnlyViewerLegacyPass() : FunctionPass(ID) { + initializeCFGOnlyViewerLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto *BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); + auto *BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); + viewCFG(F, BFI, BPI, getMaxFreq(F, BFI), /*CFGOnly=*/true); + return false; + } + + void print(raw_ostream &OS, const Module * = nullptr) const override {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + FunctionPass::getAnalysisUsage(AU); + AU.addRequired<BlockFrequencyInfoWrapperPass>(); + AU.addRequired<BranchProbabilityInfoWrapperPass>(); + AU.setPreservesAll(); + } +}; } char CFGOnlyViewerLegacyPass::ID = 0; @@ -89,84 +155,83 @@ INITIALIZE_PASS(CFGOnlyViewerLegacyPass, "view-cfg-only", PreservedAnalyses CFGOnlyViewerPass::run(Function &F, FunctionAnalysisManager &AM) { - F.viewCFGOnly(); + auto *BFI = &AM.getResult<BlockFrequencyAnalysis>(F); + auto *BPI = &AM.getResult<BranchProbabilityAnalysis>(F); + viewCFG(F, BFI, BPI, getMaxFreq(F, BFI), /*CFGOnly=*/true); return PreservedAnalyses::all(); } -static void writeCFGToDotFile(Function &F, bool CFGOnly = false) { - if (!CFGFuncName.empty() && !F.getName().contains(CFGFuncName)) - return; - std::string Filename = - (CFGDotFilenamePrefix + "." + F.getName() + ".dot").str(); - errs() << "Writing '" << Filename << "'..."; - - std::error_code EC; - raw_fd_ostream File(Filename, EC, sys::fs::OF_Text); - - if (!EC) - WriteGraph(File, (const Function*)&F, CFGOnly); - else - errs() << " error opening file for writing!"; - errs() << "\n"; -} - namespace { - struct CFGPrinterLegacyPass : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - CFGPrinterLegacyPass() : FunctionPass(ID) { - initializeCFGPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override { - writeCFGToDotFile(F); - return false; - } - - void print(raw_ostream &OS, const Module* = nullptr) const override {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - } - }; +struct CFGPrinterLegacyPass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + CFGPrinterLegacyPass() : FunctionPass(ID) { + initializeCFGPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto *BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); + auto *BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); + writeCFGToDotFile(F, BFI, BPI, getMaxFreq(F, BFI)); + return false; + } + + void print(raw_ostream &OS, const Module * = nullptr) const override {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + FunctionPass::getAnalysisUsage(AU); + AU.addRequired<BlockFrequencyInfoWrapperPass>(); + AU.addRequired<BranchProbabilityInfoWrapperPass>(); + AU.setPreservesAll(); + } +}; } char CFGPrinterLegacyPass::ID = 0; -INITIALIZE_PASS(CFGPrinterLegacyPass, "dot-cfg", "Print CFG of function to 'dot' file", - false, true) +INITIALIZE_PASS(CFGPrinterLegacyPass, "dot-cfg", + "Print CFG of function to 'dot' file", false, true) PreservedAnalyses CFGPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { - writeCFGToDotFile(F); + auto *BFI = &AM.getResult<BlockFrequencyAnalysis>(F); + auto *BPI = &AM.getResult<BranchProbabilityAnalysis>(F); + writeCFGToDotFile(F, BFI, BPI, getMaxFreq(F, BFI)); return PreservedAnalyses::all(); } namespace { - struct CFGOnlyPrinterLegacyPass : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - CFGOnlyPrinterLegacyPass() : FunctionPass(ID) { - initializeCFGOnlyPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override { - writeCFGToDotFile(F, /*CFGOnly=*/true); - return false; - } - void print(raw_ostream &OS, const Module* = nullptr) const override {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - } - }; +struct CFGOnlyPrinterLegacyPass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + CFGOnlyPrinterLegacyPass() : FunctionPass(ID) { + initializeCFGOnlyPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto *BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); + auto *BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); + writeCFGToDotFile(F, BFI, BPI, getMaxFreq(F, BFI), /*CFGOnly=*/true); + return false; + } + void print(raw_ostream &OS, const Module * = nullptr) const override {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + FunctionPass::getAnalysisUsage(AU); + AU.addRequired<BlockFrequencyInfoWrapperPass>(); + AU.addRequired<BranchProbabilityInfoWrapperPass>(); + AU.setPreservesAll(); + } +}; } char CFGOnlyPrinterLegacyPass::ID = 0; INITIALIZE_PASS(CFGOnlyPrinterLegacyPass, "dot-cfg-only", - "Print CFG of function to 'dot' file (with no function bodies)", - false, true) + "Print CFG of function to 'dot' file (with no function bodies)", + false, true) PreservedAnalyses CFGOnlyPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { - writeCFGToDotFile(F, /*CFGOnly=*/true); + auto *BFI = &AM.getResult<BlockFrequencyAnalysis>(F); + auto *BPI = &AM.getResult<BranchProbabilityAnalysis>(F); + writeCFGToDotFile(F, BFI, BPI, getMaxFreq(F, BFI), /*CFGOnly=*/true); return PreservedAnalyses::all(); } @@ -175,10 +240,14 @@ PreservedAnalyses CFGOnlyPrinterPass::run(Function &F, /// program, displaying the CFG of the current function. This depends on there /// being a 'dot' and 'gv' program in your path. /// -void Function::viewCFG() const { +void Function::viewCFG() const { viewCFG(false, nullptr, nullptr); } + +void Function::viewCFG(bool ViewCFGOnly, const BlockFrequencyInfo *BFI, + const BranchProbabilityInfo *BPI) const { if (!CFGFuncName.empty() && !getName().contains(CFGFuncName)) - return; - ViewGraph(this, "cfg" + getName()); + return; + DOTFuncInfo CFGInfo(this, BFI, BPI, BFI ? getMaxFreq(*this, BFI) : 0); + ViewGraph(&CFGInfo, "cfg" + getName(), ViewCFGOnly); } /// viewCFGOnly - This function is meant for use from the debugger. It works @@ -186,17 +255,45 @@ void Function::viewCFG() const { /// into the nodes, just the label. If you are only interested in the CFG /// this can make the graph smaller. /// -void Function::viewCFGOnly() const { - if (!CFGFuncName.empty() && !getName().contains(CFGFuncName)) - return; - ViewGraph(this, "cfg" + getName(), true); +void Function::viewCFGOnly() const { viewCFGOnly(nullptr, nullptr); } + +void Function::viewCFGOnly(const BlockFrequencyInfo *BFI, + const BranchProbabilityInfo *BPI) const { + viewCFG(true, BFI, BPI); } -FunctionPass *llvm::createCFGPrinterLegacyPassPass () { +FunctionPass *llvm::createCFGPrinterLegacyPassPass() { return new CFGPrinterLegacyPass(); } -FunctionPass *llvm::createCFGOnlyPrinterLegacyPassPass () { +FunctionPass *llvm::createCFGOnlyPrinterLegacyPassPass() { return new CFGOnlyPrinterLegacyPass(); } +void DOTGraphTraits<DOTFuncInfo *>::computeHiddenNodes(const Function *F) { + auto evaluateBB = [&](const BasicBlock *Node) { + if (succ_begin(Node) == succ_end(Node)) { + const Instruction *TI = Node->getTerminator(); + isHiddenBasicBlock[Node] = + (HideUnreachablePaths && isa<UnreachableInst>(TI)) || + (HideDeoptimizePaths && Node->getTerminatingDeoptimizeCall()); + return; + } + isHiddenBasicBlock[Node] = std::all_of( + succ_begin(Node), succ_end(Node), + [this](const BasicBlock *BB) { return isHiddenBasicBlock[BB]; }); + }; + /// The post order traversal iteration is done to know the status of + /// isHiddenBasicBlock for all the successors on the current BB. + for_each(po_begin(&F->getEntryBlock()), po_end(&F->getEntryBlock()), + evaluateBB); +} + +bool DOTGraphTraits<DOTFuncInfo *>::isNodeHidden(const BasicBlock *Node) { + // If both restricting flags are false, all nodes are displayed. + if (!HideUnreachablePaths && !HideDeoptimizePaths) + return false; + if (isHiddenBasicBlock.find(Node) == isHiddenBasicBlock.end()) + computeHiddenNodes(Node->getParent()); + return isHiddenBasicBlock[Node]; +} diff --git a/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp index eb5c96e6eecaf..179f0633df06e 100644 --- a/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp +++ b/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp @@ -552,7 +552,7 @@ bool CFLAndersAAResult::FunctionInfo::mayAlias( return std::less<const Value *>()(LHS.Val, RHS.Val); }; #ifdef EXPENSIVE_CHECKS - assert(std::is_sorted(Itr->second.begin(), Itr->second.end(), Comparator)); + assert(llvm::is_sorted(Itr->second, Comparator)); #endif auto RangePair = std::equal_range(Itr->second.begin(), Itr->second.end(), OffsetValue{RHS, 0}, Comparator); diff --git a/llvm/lib/Analysis/CGSCCPassManager.cpp b/llvm/lib/Analysis/CGSCCPassManager.cpp index a0b3f83cca6a6..fd3166f8cd0cd 100644 --- a/llvm/lib/Analysis/CGSCCPassManager.cpp +++ b/llvm/lib/Analysis/CGSCCPassManager.cpp @@ -15,14 +15,15 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/LazyCallGraph.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/PassManagerImpl.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TimeProfiler.h" #include <algorithm> #include <cassert> #include <iterator> @@ -67,16 +68,24 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, // a pointer that we can update. LazyCallGraph::SCC *C = &InitialC; - for (auto &Pass : Passes) { - if (DebugLogging) - dbgs() << "Running pass: " << Pass->name() << " on " << *C << "\n"; + // Get Function analysis manager from its proxy. + FunctionAnalysisManager &FAM = + AM.getCachedResult<FunctionAnalysisManagerCGSCCProxy>(*C)->getManager(); + for (auto &Pass : Passes) { // Check the PassInstrumentation's BeforePass callbacks before running the // pass, skip its execution completely if asked to (callback returns false). if (!PI.runBeforePass(*Pass, *C)) continue; - PreservedAnalyses PassPA = Pass->run(*C, AM, G, UR); + if (DebugLogging) + dbgs() << "Running pass: " << Pass->name() << " on " << *C << "\n"; + + PreservedAnalyses PassPA; + { + TimeTraceScope TimeScope(Pass->name()); + PassPA = Pass->run(*C, AM, G, UR); + } if (UR.InvalidatedSCCs.count(C)) PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass); @@ -85,6 +94,12 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, // Update the SCC if necessary. C = UR.UpdatedC ? UR.UpdatedC : C; + if (UR.UpdatedC) { + // If C is updated, also create a proxy and update FAM inside the result. + auto *ResultFAMCP = + &AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, G); + ResultFAMCP->updateFAM(FAM); + } // If the CGSCC pass wasn't able to provide a valid updated SCC, the // current SCC may simply need to be skipped if invalid. @@ -218,23 +233,22 @@ FunctionAnalysisManagerCGSCCProxy::Result FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG) { - // Collect the FunctionAnalysisManager from the Module layer and use that to - // build the proxy result. - // - // This allows us to rely on the FunctionAnalysisMangaerModuleProxy to - // invalidate the function analyses. - auto &MAM = AM.getResult<ModuleAnalysisManagerCGSCCProxy>(C, CG).getManager(); + // Note: unconditionally getting checking that the proxy exists may get it at + // this point. There are cases when this is being run unnecessarily, but + // it is cheap and having the assertion in place is more valuable. + auto &MAMProxy = AM.getResult<ModuleAnalysisManagerCGSCCProxy>(C, CG); Module &M = *C.begin()->getFunction().getParent(); - auto *FAMProxy = MAM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M); - assert(FAMProxy && "The CGSCC pass manager requires that the FAM module " - "proxy is run on the module prior to entering the CGSCC " - "walk."); - - // Note that we special-case invalidation handling of this proxy in the CGSCC - // analysis manager's Module proxy. This avoids the need to do anything - // special here to recompute all of this if ever the FAM's module proxy goes - // away. - return Result(FAMProxy->getManager()); + bool ProxyExists = + MAMProxy.cachedResultExists<FunctionAnalysisManagerModuleProxy>(M); + assert(ProxyExists && + "The CGSCC pass manager requires that the FAM module proxy is run " + "on the module prior to entering the CGSCC walk"); + (void)ProxyExists; + + // We just return an empty result. The caller will use the updateFAM interface + // to correctly register the relevant FunctionAnalysisManager based on the + // context in which this proxy is run. + return Result(); } bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate( @@ -244,8 +258,8 @@ bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate( if (PA.areAllPreserved()) return false; // This is still a valid proxy. - // If this proxy isn't marked as preserved, then even if the result remains - // valid, the key itself may no longer be valid, so we clear everything. + // All updates to preserve valid results are done below, so we don't need to + // invalidate this proxy. // // Note that in order to preserve this proxy, a module pass must ensure that // the FAM has been completely updated to handle the deletion of functions. @@ -257,7 +271,7 @@ bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate( for (LazyCallGraph::Node &N : C) FAM->clear(N.getFunction(), N.getFunction().getName()); - return true; + return false; } // Directly check if the relevant set is preserved. @@ -306,9 +320,10 @@ bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate( } // end namespace llvm -/// When a new SCC is created for the graph and there might be function -/// analysis results cached for the functions now in that SCC two forms of -/// updates are required. +/// When a new SCC is created for the graph we first update the +/// FunctionAnalysisManager in the Proxy's result. +/// As there might be function analysis results cached for the functions now in +/// that SCC, two forms of updates are required. /// /// First, a proxy from the SCC to the FunctionAnalysisManager needs to be /// created so that any subsequent invalidation events to the SCC are @@ -320,10 +335,9 @@ bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate( /// function analyses so that they don't retain stale handles. static void updateNewSCCFunctionAnalyses(LazyCallGraph::SCC &C, LazyCallGraph &G, - CGSCCAnalysisManager &AM) { - // Get the relevant function analysis manager. - auto &FAM = - AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, G).getManager(); + CGSCCAnalysisManager &AM, + FunctionAnalysisManager &FAM) { + AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, G).updateFAM(FAM); // Now walk the functions in this SCC and invalidate any function analysis // results that might have outer dependencies on an SCC analysis. @@ -387,8 +401,10 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, // If we had a cached FAM proxy originally, we will want to create more of // them for each SCC that was split off. - bool NeedFAMProxy = - AM.getCachedResult<FunctionAnalysisManagerCGSCCProxy>(*OldC) != nullptr; + FunctionAnalysisManager *FAM = nullptr; + if (auto *FAMProxy = + AM.getCachedResult<FunctionAnalysisManagerCGSCCProxy>(*OldC)) + FAM = &FAMProxy->getManager(); // We need to propagate an invalidation call to all but the newly current SCC // because the outer pass manager won't do that for us after splitting them. @@ -402,8 +418,8 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, AM.invalidate(*OldC, PA); // Ensure the now-current SCC's function analyses are updated. - if (NeedFAMProxy) - updateNewSCCFunctionAnalyses(*C, G, AM); + if (FAM) + updateNewSCCFunctionAnalyses(*C, G, AM, *FAM); for (SCC &NewC : llvm::reverse(make_range(std::next(NewSCCRange.begin()), NewSCCRange.end()))) { @@ -413,8 +429,8 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, LLVM_DEBUG(dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n"); // Ensure new SCCs' function analyses are updated. - if (NeedFAMProxy) - updateNewSCCFunctionAnalyses(NewC, G, AM); + if (FAM) + updateNewSCCFunctionAnalyses(NewC, G, AM, *FAM); // Also propagate a normal invalidation to the new SCC as only the current // will get one from the pass manager infrastructure. @@ -423,9 +439,10 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, return C; } -LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( +static LazyCallGraph::SCC &updateCGAndAnalysisManagerForPass( LazyCallGraph &G, LazyCallGraph::SCC &InitialC, LazyCallGraph::Node &N, - CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR) { + CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, + FunctionAnalysisManager &FAM, bool FunctionPass) { using Node = LazyCallGraph::Node; using Edge = LazyCallGraph::Edge; using SCC = LazyCallGraph::SCC; @@ -443,28 +460,28 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( SmallPtrSet<Node *, 16> RetainedEdges; SmallSetVector<Node *, 4> PromotedRefTargets; SmallSetVector<Node *, 4> DemotedCallTargets; + SmallSetVector<Node *, 4> NewCallEdges; + SmallSetVector<Node *, 4> NewRefEdges; // First walk the function and handle all called functions. We do this first // because if there is a single call edge, whether there are ref edges is // irrelevant. for (Instruction &I : instructions(F)) - if (auto CS = CallSite(&I)) - if (Function *Callee = CS.getCalledFunction()) + if (auto *CB = dyn_cast<CallBase>(&I)) + if (Function *Callee = CB->getCalledFunction()) if (Visited.insert(Callee).second && !Callee->isDeclaration()) { Node &CalleeN = *G.lookup(*Callee); Edge *E = N->lookup(CalleeN); - // FIXME: We should really handle adding new calls. While it will - // make downstream usage more complex, there is no fundamental - // limitation and it will allow passes within the CGSCC to be a bit - // more flexible in what transforms they can do. Until then, we - // verify that new calls haven't been introduced. - assert(E && "No function transformations should introduce *new* " - "call edges! Any new calls should be modeled as " - "promoted existing ref edges!"); + assert((E || !FunctionPass) && + "No function transformations should introduce *new* " + "call edges! Any new calls should be modeled as " + "promoted existing ref edges!"); bool Inserted = RetainedEdges.insert(&CalleeN).second; (void)Inserted; assert(Inserted && "We should never visit a function twice."); - if (!E->isCall()) + if (!E) + NewCallEdges.insert(&CalleeN); + else if (!E->isCall()) PromotedRefTargets.insert(&CalleeN); } @@ -478,19 +495,42 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( auto VisitRef = [&](Function &Referee) { Node &RefereeN = *G.lookup(Referee); Edge *E = N->lookup(RefereeN); - // FIXME: Similarly to new calls, we also currently preclude - // introducing new references. See above for details. - assert(E && "No function transformations should introduce *new* ref " - "edges! Any new ref edges would require IPO which " - "function passes aren't allowed to do!"); + assert((E || !FunctionPass) && + "No function transformations should introduce *new* ref " + "edges! Any new ref edges would require IPO which " + "function passes aren't allowed to do!"); bool Inserted = RetainedEdges.insert(&RefereeN).second; (void)Inserted; assert(Inserted && "We should never visit a function twice."); - if (E->isCall()) + if (!E) + NewRefEdges.insert(&RefereeN); + else if (E->isCall()) DemotedCallTargets.insert(&RefereeN); }; LazyCallGraph::visitReferences(Worklist, Visited, VisitRef); + // Handle new ref edges. + for (Node *RefTarget : NewRefEdges) { + SCC &TargetC = *G.lookupSCC(*RefTarget); + RefSCC &TargetRC = TargetC.getOuterRefSCC(); + (void)TargetRC; + // TODO: This only allows trivial edges to be added for now. + assert((RC == &TargetRC || + RC->isAncestorOf(TargetRC)) && "New ref edge is not trivial!"); + RC->insertTrivialRefEdge(N, *RefTarget); + } + + // Handle new call edges. + for (Node *CallTarget : NewCallEdges) { + SCC &TargetC = *G.lookupSCC(*CallTarget); + RefSCC &TargetRC = TargetC.getOuterRefSCC(); + (void)TargetRC; + // TODO: This only allows trivial edges to be added for now. + assert((RC == &TargetRC || + RC->isAncestorOf(TargetRC)) && "New call edge is not trivial!"); + RC->insertTrivialCallEdge(N, *CallTarget); + } + // Include synthetic reference edges to known, defined lib functions. for (auto *F : G.getLibFunctions()) // While the list of lib functions doesn't have repeats, don't re-visit @@ -658,7 +698,7 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( // analysis manager, we need to create a proxy in the new current SCC as // the invalidated SCCs had their functions moved. if (HasFunctionAnalysisProxy) - AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, G); + AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, G).updateFAM(FAM); // Any analyses cached for this SCC are no longer precise as the shape // has changed by introducing this cycle. However, we have taken care to @@ -707,3 +747,18 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( return *C; } + +LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( + LazyCallGraph &G, LazyCallGraph::SCC &InitialC, LazyCallGraph::Node &N, + CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, + FunctionAnalysisManager &FAM) { + return updateCGAndAnalysisManagerForPass(G, InitialC, N, AM, UR, FAM, + /* FunctionPass */ true); +} +LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForCGSCCPass( + LazyCallGraph &G, LazyCallGraph::SCC &InitialC, LazyCallGraph::Node &N, + CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, + FunctionAnalysisManager &FAM) { + return updateCGAndAnalysisManagerForPass(G, InitialC, N, AM, UR, FAM, + /* FunctionPass */ false); +} diff --git a/llvm/lib/Analysis/CallGraph.cpp b/llvm/lib/Analysis/CallGraph.cpp index 8e8a50178518d..55adb454b7338 100644 --- a/llvm/lib/Analysis/CallGraph.cpp +++ b/llvm/lib/Analysis/CallGraph.cpp @@ -10,7 +10,9 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Config/llvm-config.h" +#include "llvm/IR/AbstractCallSite.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" @@ -30,10 +32,11 @@ using namespace llvm; CallGraph::CallGraph(Module &M) : M(M), ExternalCallingNode(getOrInsertFunction(nullptr)), - CallsExternalNode(std::make_unique<CallGraphNode>(nullptr)) { - // Add every function to the call graph. + CallsExternalNode(std::make_unique<CallGraphNode>(this, nullptr)) { + // Add every interesting function to the call graph. for (Function &F : M) - addToCallGraph(&F); + if (!isDbgInfoIntrinsic(F.getIntrinsicID())) + addToCallGraph(&F); } CallGraph::CallGraph(CallGraph &&Arg) @@ -42,6 +45,11 @@ CallGraph::CallGraph(CallGraph &&Arg) CallsExternalNode(std::move(Arg.CallsExternalNode)) { Arg.FunctionMap.clear(); Arg.ExternalCallingNode = nullptr; + + // Update parent CG for all call graph's nodes. + CallsExternalNode->CG = this; + for (auto &P : FunctionMap) + P.second->CG = this; } CallGraph::~CallGraph() { @@ -57,14 +65,30 @@ CallGraph::~CallGraph() { #endif } +bool CallGraph::invalidate(Module &, const PreservedAnalyses &PA, + ModuleAnalysisManager::Invalidator &) { + // Check whether the analysis, all analyses on functions, or the function's + // CFG have been preserved. + auto PAC = PA.getChecker<CallGraphAnalysis>(); + return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Module>>() || + PAC.preservedSet<CFGAnalyses>()); +} + void CallGraph::addToCallGraph(Function *F) { CallGraphNode *Node = getOrInsertFunction(F); - // If this function has external linkage or has its address taken, anything - // could call it. - if (!F->hasLocalLinkage() || F->hasAddressTaken()) + // If this function has external linkage or has its address taken and + // it is not a callback, then anything could call it. + if (!F->hasLocalLinkage() || + F->hasAddressTaken(nullptr, /*IgnoreCallbackUses=*/true)) ExternalCallingNode->addCalledFunction(nullptr, Node); + populateCallGraphNode(Node); +} + +void CallGraph::populateCallGraphNode(CallGraphNode *Node) { + Function *F = Node->getFunction(); + // If this function is not defined in this translation unit, it could call // anything. if (F->isDeclaration() && !F->isIntrinsic()) @@ -82,6 +106,11 @@ void CallGraph::addToCallGraph(Function *F) { Node->addCalledFunction(Call, CallsExternalNode.get()); else if (!Callee->isIntrinsic()) Node->addCalledFunction(Call, getOrInsertFunction(Callee)); + + // Add reference to callback functions. + forEachCallbackFunction(*Call, [=](Function *CB) { + Node->addCalledFunction(nullptr, getOrInsertFunction(CB)); + }); } } } @@ -112,6 +141,16 @@ void CallGraph::print(raw_ostream &OS) const { LLVM_DUMP_METHOD void CallGraph::dump() const { print(dbgs()); } #endif +void CallGraph::ReplaceExternalCallEdge(CallGraphNode *Old, + CallGraphNode *New) { + for (auto &CR : ExternalCallingNode->CalledFunctions) + if (CR.second == Old) { + CR.second->DropRef(); + CR.second = New; + CR.second->AddRef(); + } +} + // removeFunctionFromModule - Unlink the function from this module, returning // it. Because this removes the function from the module, the call graph node // is destroyed. This is only valid if the function does not call any other @@ -151,7 +190,7 @@ CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) { return CGN.get(); assert((!F || F->getParent() == &M) && "Function not in current module!"); - CGN = std::make_unique<CallGraphNode>(const_cast<Function *>(F)); + CGN = std::make_unique<CallGraphNode>(this, const_cast<Function *>(F)); return CGN.get(); } @@ -187,10 +226,15 @@ LLVM_DUMP_METHOD void CallGraphNode::dump() const { print(dbgs()); } void CallGraphNode::removeCallEdgeFor(CallBase &Call) { for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { assert(I != CalledFunctions.end() && "Cannot find callsite to remove!"); - if (I->first == &Call) { + if (I->first && *I->first == &Call) { I->second->DropRef(); *I = CalledFunctions.back(); CalledFunctions.pop_back(); + + // Remove all references to callback functions if there are any. + forEachCallbackFunction(Call, [=](Function *CB) { + removeOneAbstractEdgeTo(CG->getOrInsertFunction(CB)); + }); return; } } @@ -215,7 +259,7 @@ void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) { for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { assert(I != CalledFunctions.end() && "Cannot find callee to remove!"); CallRecord &CR = *I; - if (CR.second == Callee && CR.first == nullptr) { + if (CR.second == Callee && !CR.first) { Callee->DropRef(); *I = CalledFunctions.back(); CalledFunctions.pop_back(); @@ -231,11 +275,19 @@ void CallGraphNode::replaceCallEdge(CallBase &Call, CallBase &NewCall, CallGraphNode *NewNode) { for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { assert(I != CalledFunctions.end() && "Cannot find callsite to remove!"); - if (I->first == &Call) { + if (I->first && *I->first == &Call) { I->second->DropRef(); I->first = &NewCall; I->second = NewNode; NewNode->AddRef(); + + // Refresh callback references. + forEachCallbackFunction(Call, [=](Function *CB) { + removeOneAbstractEdgeTo(CG->getOrInsertFunction(CB)); + }); + forEachCallbackFunction(NewCall, [=](Function *CB) { + addCalledFunction(nullptr, CG->getOrInsertFunction(CB)); + }); return; } } diff --git a/llvm/lib/Analysis/CallGraphSCCPass.cpp b/llvm/lib/Analysis/CallGraphSCCPass.cpp index 196ef400bc4e6..91f8029cc326b 100644 --- a/llvm/lib/Analysis/CallGraphSCCPass.cpp +++ b/llvm/lib/Analysis/CallGraphSCCPass.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/AbstractCallSite.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/Intrinsics.h" @@ -225,22 +226,51 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, // invalidated and removed. unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0; + CallGraphNode::iterator CGNEnd = CGN->end(); + + auto RemoveAndCheckForDone = [&](CallGraphNode::iterator I) { + // Just remove the edge from the set of callees, keep track of whether + // I points to the last element of the vector. + bool WasLast = I + 1 == CGNEnd; + CGN->removeCallEdge(I); + + // If I pointed to the last element of the vector, we have to bail out: + // iterator checking rejects comparisons of the resultant pointer with + // end. + if (WasLast) + return true; + + CGNEnd = CGN->end(); + return false; + }; + // Get the set of call sites currently in the function. - for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) { + for (CallGraphNode::iterator I = CGN->begin(); I != CGNEnd;) { + // Delete "reference" call records that do not have call instruction. We + // reinsert them as needed later. However, keep them in checking mode. + if (!I->first) { + if (CheckingMode) { + ++I; + continue; + } + if (RemoveAndCheckForDone(I)) + break; + continue; + } + // If this call site is null, then the function pass deleted the call // entirely and the WeakTrackingVH nulled it out. - auto *Call = dyn_cast_or_null<CallBase>(I->first); - if (!I->first || + auto *Call = dyn_cast_or_null<CallBase>(*I->first); + if (!Call || // If we've already seen this call site, then the FunctionPass RAUW'd // one call with another, which resulted in two "uses" in the edge // list of the same call. - Calls.count(I->first) || + Calls.count(Call) || // If the call edge is not from a call or invoke, or it is a // instrinsic call, then the function pass RAUW'd a call with // another value. This can happen when constant folding happens // of well known functions etc. - !Call || (Call->getCalledFunction() && Call->getCalledFunction()->isIntrinsic() && Intrinsic::isLeaf(Call->getCalledFunction()->getIntrinsicID()))) { @@ -253,28 +283,18 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, else ++NumDirectRemoved; - // Just remove the edge from the set of callees, keep track of whether - // I points to the last element of the vector. - bool WasLast = I + 1 == E; - CGN->removeCallEdge(I); - - // If I pointed to the last element of the vector, we have to bail out: - // iterator checking rejects comparisons of the resultant pointer with - // end. - if (WasLast) + if (RemoveAndCheckForDone(I)) break; - E = CGN->end(); continue; } - assert(!Calls.count(I->first) && - "Call site occurs in node multiple times"); + assert(!Calls.count(Call) && "Call site occurs in node multiple times"); if (Call) { Function *Callee = Call->getCalledFunction(); // Ignore intrinsics because they're not really function calls. if (!Callee || !(Callee->isIntrinsic())) - Calls.insert(std::make_pair(I->first, I->second)); + Calls.insert(std::make_pair(Call, I->second)); } ++I; } @@ -292,6 +312,15 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, if (Callee && Callee->isIntrinsic()) continue; + // If we are not in checking mode, insert potential callback calls as + // references. This is not a requirement but helps to iterate over the + // functions in the right order. + if (!CheckingMode) { + forEachCallbackFunction(*Call, [&](Function *CB) { + CGN->addCalledFunction(nullptr, CG.getOrInsertFunction(CB)); + }); + } + // If this call site already existed in the callgraph, just verify it // matches up to expectations and remove it from Calls. DenseMap<Value *, CallGraphNode *>::iterator ExistingIt = @@ -549,7 +578,10 @@ void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) { for (unsigned i = 0; ; ++i) { assert(i != Nodes.size() && "Node not in SCC"); if (Nodes[i] != Old) continue; - Nodes[i] = New; + if (New) + Nodes[i] = New; + else + Nodes.erase(Nodes.begin() + i); break; } @@ -559,6 +591,10 @@ void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) { CGI->ReplaceNode(Old, New); } +void CallGraphSCC::DeleteNode(CallGraphNode *Old) { + ReplaceNode(Old, /*New=*/nullptr); +} + //===----------------------------------------------------------------------===// // CallGraphSCCPass Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Analysis/CallPrinter.cpp b/llvm/lib/Analysis/CallPrinter.cpp index 7246b73bfd4bf..bb447411ec472 100644 --- a/llvm/lib/Analysis/CallPrinter.cpp +++ b/llvm/lib/Analysis/CallPrinter.cpp @@ -14,63 +14,279 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CallPrinter.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/DOTGraphTraitsPass.h" +#include "llvm/Analysis/HeatUtils.h" +#include "llvm/Support/CommandLine.h" #include "llvm/InitializePasses.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" using namespace llvm; +// This option shows static (relative) call counts. +// FIXME: +// Need to show real counts when profile data is available +static cl::opt<bool> ShowHeatColors("callgraph-heat-colors", cl::init(false), + cl::Hidden, + cl::desc("Show heat colors in call-graph")); + +static cl::opt<bool> + ShowEdgeWeight("callgraph-show-weights", cl::init(false), cl::Hidden, + cl::desc("Show edges labeled with weights")); + +static cl::opt<bool> + CallMultiGraph("callgraph-multigraph", cl::init(false), cl::Hidden, + cl::desc("Show call-multigraph (do not remove parallel edges)")); + +static cl::opt<std::string> CallGraphDotFilenamePrefix( + "callgraph-dot-filename-prefix", cl::Hidden, + cl::desc("The prefix used for the CallGraph dot file names.")); + namespace llvm { -template <> struct DOTGraphTraits<CallGraph *> : public DefaultDOTGraphTraits { +class CallGraphDOTInfo { +private: + Module *M; + CallGraph *CG; + DenseMap<const Function *, uint64_t> Freq; + uint64_t MaxFreq; + +public: + std::function<BlockFrequencyInfo *(Function &)> LookupBFI; + + CallGraphDOTInfo(Module *M, CallGraph *CG, + function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) + : M(M), CG(CG), LookupBFI(LookupBFI) { + MaxFreq = 0; + + for (auto F = M->getFunctionList().begin(); F != M->getFunctionList().end(); ++F) { + uint64_t localSumFreq = 0; + SmallSet<Function *, 16> Callers; + for (User *U : (*F).users()) + if (isa<CallInst>(U)) + Callers.insert(cast<Instruction>(U)->getFunction()); + for (auto iter = Callers.begin() ; iter != Callers.end() ; ++iter) + localSumFreq += getNumOfCalls((**iter), *F); + if (localSumFreq >= MaxFreq) + MaxFreq = localSumFreq; + Freq[&*F] = localSumFreq; + } + if (!CallMultiGraph) + removeParallelEdges(); + } + + Module *getModule() const { return M; } + + CallGraph *getCallGraph() const { return CG; } + + uint64_t getFreq(const Function *F) { return Freq[F]; } + + uint64_t getMaxFreq() { return MaxFreq; } + +private: + void removeParallelEdges() { + for (auto &I : (*CG)) { + CallGraphNode *Node = I.second.get(); + + bool FoundParallelEdge = true; + while (FoundParallelEdge) { + SmallSet<Function *, 16> Visited; + FoundParallelEdge = false; + for (auto CI = Node->begin(), CE = Node->end(); CI != CE; CI++) { + if (!(Visited.insert(CI->second->getFunction())).second) { + FoundParallelEdge = true; + Node->removeCallEdge(CI); + break; + } + } + } + } + } +}; + +template <> +struct GraphTraits<CallGraphDOTInfo *> + : public GraphTraits<const CallGraphNode *> { + static NodeRef getEntryNode(CallGraphDOTInfo *CGInfo) { + // Start at the external node! + return CGInfo->getCallGraph()->getExternalCallingNode(); + } + + typedef std::pair<const Function *const, std::unique_ptr<CallGraphNode>> + PairTy; + static const CallGraphNode *CGGetValuePtr(const PairTy &P) { + return P.second.get(); + } + + // nodes_iterator/begin/end - Allow iteration over all nodes in the graph + typedef mapped_iterator<CallGraph::const_iterator, decltype(&CGGetValuePtr)> + nodes_iterator; + + static nodes_iterator nodes_begin(CallGraphDOTInfo *CGInfo) { + return nodes_iterator(CGInfo->getCallGraph()->begin(), &CGGetValuePtr); + } + static nodes_iterator nodes_end(CallGraphDOTInfo *CGInfo) { + return nodes_iterator(CGInfo->getCallGraph()->end(), &CGGetValuePtr); + } +}; + +template <> +struct DOTGraphTraits<CallGraphDOTInfo *> : public DefaultDOTGraphTraits { + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} - static std::string getGraphName(CallGraph *Graph) { return "Call graph"; } + static std::string getGraphName(CallGraphDOTInfo *CGInfo) { + return "Call graph: " + + std::string(CGInfo->getModule()->getModuleIdentifier()); + } - std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) { - if (Function *Func = Node->getFunction()) - return Func->getName(); + static bool isNodeHidden(const CallGraphNode *Node) { + if (CallMultiGraph || Node->getFunction()) + return false; + return true; + } + std::string getNodeLabel(const CallGraphNode *Node, + CallGraphDOTInfo *CGInfo) { + if (Node == CGInfo->getCallGraph()->getExternalCallingNode()) + return "external caller"; + if (Node == CGInfo->getCallGraph()->getCallsExternalNode()) + return "external callee"; + + if (Function *Func = Node->getFunction()) + return std::string(Func->getName()); return "external node"; } -}; + static const CallGraphNode *CGGetValuePtr(CallGraphNode::CallRecord P) { + return P.second; + } + + // nodes_iterator/begin/end - Allow iteration over all nodes in the graph + typedef mapped_iterator<CallGraphNode::const_iterator, + decltype(&CGGetValuePtr)> + nodes_iterator; + + std::string getEdgeAttributes(const CallGraphNode *Node, nodes_iterator I, + CallGraphDOTInfo *CGInfo) { + if (!ShowEdgeWeight) + return ""; + + Function *Caller = Node->getFunction(); + if (Caller == nullptr || Caller->isDeclaration()) + return ""; + + Function *Callee = (*I)->getFunction(); + if (Callee == nullptr) + return ""; -struct AnalysisCallGraphWrapperPassTraits { - static CallGraph *getGraph(CallGraphWrapperPass *P) { - return &P->getCallGraph(); + uint64_t Counter = getNumOfCalls(*Caller, *Callee); + double Width = + 1 + 2 * (double(Counter) / CGInfo->getMaxFreq()); + std::string Attrs = "label=\"" + std::to_string(Counter) + + "\" penwidth=" + std::to_string(Width); + return Attrs; + } + + std::string getNodeAttributes(const CallGraphNode *Node, + CallGraphDOTInfo *CGInfo) { + Function *F = Node->getFunction(); + if (F == nullptr) + return ""; + std::string attrs = ""; + if (ShowHeatColors) { + uint64_t freq = CGInfo->getFreq(F); + std::string color = getHeatColor(freq, CGInfo->getMaxFreq()); + std::string edgeColor = (freq <= (CGInfo->getMaxFreq() / 2)) + ? getHeatColor(0) + : getHeatColor(1); + attrs = "color=\"" + edgeColor + "ff\", style=filled, fillcolor=\"" + + color + "80\""; + } + return attrs; } }; } // end llvm namespace namespace { - -struct CallGraphViewer - : public DOTGraphTraitsModuleViewer<CallGraphWrapperPass, true, CallGraph *, - AnalysisCallGraphWrapperPassTraits> { +// Viewer +class CallGraphViewer : public ModulePass { +public: static char ID; + CallGraphViewer() : ModulePass(ID) {} - CallGraphViewer() - : DOTGraphTraitsModuleViewer<CallGraphWrapperPass, true, CallGraph *, - AnalysisCallGraphWrapperPassTraits>( - "callgraph", ID) { - initializeCallGraphViewerPass(*PassRegistry::getPassRegistry()); - } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnModule(Module &M) override; }; -struct CallGraphDOTPrinter : public DOTGraphTraitsModulePrinter< - CallGraphWrapperPass, true, CallGraph *, - AnalysisCallGraphWrapperPassTraits> { +void CallGraphViewer::getAnalysisUsage(AnalysisUsage &AU) const { + ModulePass::getAnalysisUsage(AU); + AU.addRequired<BlockFrequencyInfoWrapperPass>(); + AU.setPreservesAll(); +} + +bool CallGraphViewer::runOnModule(Module &M) { + auto LookupBFI = [this](Function &F) { + return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); + }; + + CallGraph CG(M); + CallGraphDOTInfo CFGInfo(&M, &CG, LookupBFI); + + std::string Title = + DOTGraphTraits<CallGraphDOTInfo *>::getGraphName(&CFGInfo); + ViewGraph(&CFGInfo, "callgraph", true, Title); + + return false; +} + +// DOT Printer + +class CallGraphDOTPrinter : public ModulePass { +public: static char ID; + CallGraphDOTPrinter() : ModulePass(ID) {} - CallGraphDOTPrinter() - : DOTGraphTraitsModulePrinter<CallGraphWrapperPass, true, CallGraph *, - AnalysisCallGraphWrapperPassTraits>( - "callgraph", ID) { - initializeCallGraphDOTPrinterPass(*PassRegistry::getPassRegistry()); - } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnModule(Module &M) override; }; +void CallGraphDOTPrinter::getAnalysisUsage(AnalysisUsage &AU) const { + ModulePass::getAnalysisUsage(AU); + AU.addRequired<BlockFrequencyInfoWrapperPass>(); + AU.setPreservesAll(); +} + +bool CallGraphDOTPrinter::runOnModule(Module &M) { + auto LookupBFI = [this](Function &F) { + return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); + }; + + std::string Filename; + if (!CallGraphDotFilenamePrefix.empty()) + Filename = (CallGraphDotFilenamePrefix + ".callgraph.dot"); + else + Filename = (std::string(M.getModuleIdentifier()) + ".callgraph.dot"); + errs() << "Writing '" << Filename << "'..."; + + std::error_code EC; + raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + + CallGraph CG(M); + CallGraphDOTInfo CFGInfo(&M, &CG, LookupBFI); + + if (!EC) + WriteGraph(File, &CFGInfo); + else + errs() << " error opening file for writing!"; + errs() << "\n"; + + return false; +} + } // end anonymous namespace char CallGraphViewer::ID = 0; diff --git a/llvm/lib/Analysis/CaptureTracking.cpp b/llvm/lib/Analysis/CaptureTracking.cpp index 20e2f06540a38..8b101e3b2cc47 100644 --- a/llvm/lib/Analysis/CaptureTracking.cpp +++ b/llvm/lib/Analysis/CaptureTracking.cpp @@ -20,15 +20,30 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +/// The default value for MaxUsesToExplore argument. It's relatively small to +/// keep the cost of analysis reasonable for clients like BasicAliasAnalysis, +/// where the results can't be cached. +/// TODO: we should probably introduce a caching CaptureTracking analysis and +/// use it where possible. The caching version can use much higher limit or +/// don't have this cap at all. +static cl::opt<unsigned> +DefaultMaxUsesToExplore("capture-tracking-max-uses-to-explore", cl::Hidden, + cl::desc("Maximal number of uses to explore."), + cl::init(20)); + +unsigned llvm::getDefaultMaxUsesToExploreForCaptureTracking() { + return DefaultMaxUsesToExplore; +} + CaptureTracker::~CaptureTracker() {} bool CaptureTracker::shouldExplore(const Use *U) { return true; } @@ -76,8 +91,8 @@ namespace { struct CapturesBefore : public CaptureTracker { CapturesBefore(bool ReturnCaptures, const Instruction *I, const DominatorTree *DT, - bool IncludeI, OrderedBasicBlock *IC) - : OrderedBB(IC), BeforeHere(I), DT(DT), + bool IncludeI) + : BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures), IncludeI(IncludeI), Captured(false) {} void tooManyUses() override { Captured = true; } @@ -90,9 +105,7 @@ namespace { return true; // Compute the case where both instructions are inside the same basic - // block. Since instructions in the same BB as BeforeHere are numbered in - // 'OrderedBB', avoid using 'dominates' and 'isPotentiallyReachable' - // which are very expensive for large basic blocks. + // block. if (BB == BeforeHere->getParent()) { // 'I' dominates 'BeforeHere' => not safe to prune. // @@ -102,7 +115,7 @@ namespace { // UseBB == BB, avoid pruning. if (isa<InvokeInst>(BeforeHere) || isa<PHINode>(I) || I == BeforeHere) return false; - if (!OrderedBB->dominates(BeforeHere, I)) + if (!BeforeHere->comesBefore(I)) return false; // 'BeforeHere' comes before 'I', it's safe to prune if we also @@ -153,7 +166,6 @@ namespace { return true; } - OrderedBasicBlock *OrderedBB; const Instruction *BeforeHere; const DominatorTree *DT; @@ -196,39 +208,35 @@ bool llvm::PointerMayBeCaptured(const Value *V, /// returning the value (or part of it) from the function counts as capturing /// it or not. The boolean StoreCaptures specified whether storing the value /// (or part of it) into memory anywhere automatically counts as capturing it -/// or not. A ordered basic block \p OBB can be used in order to speed up -/// queries about relative order among instructions in the same basic block. +/// or not. bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI, - OrderedBasicBlock *OBB, unsigned MaxUsesToExplore) { assert(!isa<GlobalValue>(V) && "It doesn't make sense to ask whether a global is captured."); - bool UseNewOBB = OBB == nullptr; if (!DT) return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures, MaxUsesToExplore); - if (UseNewOBB) - OBB = new OrderedBasicBlock(I->getParent()); // TODO: See comment in PointerMayBeCaptured regarding what could be done // with StoreCaptures. - CapturesBefore CB(ReturnCaptures, I, DT, IncludeI, OBB); + CapturesBefore CB(ReturnCaptures, I, DT, IncludeI); PointerMayBeCaptured(V, &CB, MaxUsesToExplore); - - if (UseNewOBB) - delete OBB; return CB.Captured; } void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, unsigned MaxUsesToExplore) { assert(V->getType()->isPointerTy() && "Capture is for pointers only!"); - SmallVector<const Use *, DefaultMaxUsesToExplore> Worklist; - SmallSet<const Use *, DefaultMaxUsesToExplore> Visited; + if (MaxUsesToExplore == 0) + MaxUsesToExplore = DefaultMaxUsesToExplore; + + SmallVector<const Use *, 20> Worklist; + Worklist.reserve(getDefaultMaxUsesToExploreForCaptureTracking()); + SmallSet<const Use *, 20> Visited; auto AddUses = [&](const Value *V) { unsigned Count = 0; diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp index 627d955c865f2..0b2b6f9bfa460 100644 --- a/llvm/lib/Analysis/CodeMetrics.cpp +++ b/llvm/lib/Analysis/CodeMetrics.cpp @@ -11,14 +11,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "code-metrics" @@ -171,7 +170,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, if (InvI->cannotDuplicate()) notDuplicatable = true; - NumInsts += TTI.getUserCost(&I); + NumInsts += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize); } if (isa<ReturnInst>(BB->getTerminator())) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index b32924e6497a6..8c66decaaf58d 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" @@ -37,7 +38,9 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -115,8 +118,8 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { // to simplify things. if (SrcEltTy->isFloatingPointTy()) { unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); - Type *SrcIVTy = - VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts); + auto *SrcIVTy = FixedVectorType::get( + IntegerType::get(C->getContext(), FPWidth), NumSrcElts); // Ask IR to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); } @@ -152,11 +155,11 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { // If the element types match, IR can fold it. unsigned NumDstElt = DestVTy->getNumElements(); - unsigned NumSrcElt = C->getType()->getVectorNumElements(); + unsigned NumSrcElt = cast<VectorType>(C->getType())->getNumElements(); if (NumDstElt == NumSrcElt) return ConstantExpr::getBitCast(C, DestTy); - Type *SrcEltTy = C->getType()->getVectorElementType(); + Type *SrcEltTy = cast<VectorType>(C->getType())->getElementType(); Type *DstEltTy = DestVTy->getElementType(); // Otherwise, we're changing the number of elements in a vector, which @@ -172,8 +175,8 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { if (DstEltTy->isFloatingPointTy()) { // Fold to an vector of integers with same size as our FP type. unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits(); - Type *DestIVTy = - VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt); + auto *DestIVTy = FixedVectorType::get( + IntegerType::get(C->getContext(), FPWidth), NumDstElt); // Recursively handle this integer conversion, if possible. C = FoldBitCast(C, DestIVTy, DL); @@ -185,8 +188,8 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { // it to integer first. if (SrcEltTy->isFloatingPointTy()) { unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); - Type *SrcIVTy = - VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt); + auto *SrcIVTy = FixedVectorType::get( + IntegerType::get(C->getContext(), FPWidth), NumSrcElt); // Ask IR to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); // If IR wasn't able to fold it, bail out. @@ -215,7 +218,8 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { for (unsigned j = 0; j != Ratio; ++j) { Constant *Src = C->getAggregateElement(SrcElt++); if (Src && isa<UndefValue>(Src)) - Src = Constant::getNullValue(C->getType()->getVectorElementType()); + Src = Constant::getNullValue( + cast<VectorType>(C->getType())->getElementType()); else Src = dyn_cast_or_null<ConstantInt>(Src); if (!Src) // Reject constantexpr elements. @@ -329,10 +333,25 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy, const DataLayout &DL) { do { Type *SrcTy = C->getType(); + uint64_t DestSize = DL.getTypeSizeInBits(DestTy); + uint64_t SrcSize = DL.getTypeSizeInBits(SrcTy); + if (SrcSize < DestSize) + return nullptr; + + // Catch the obvious splat cases (since all-zeros can coerce non-integral + // pointers legally). + if (C->isNullValue() && !DestTy->isX86_MMXTy()) + return Constant::getNullValue(DestTy); + if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() && + !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types! + return Constant::getAllOnesValue(DestTy); // If the type sizes are the same and a cast is legal, just directly // cast the constant. - if (DL.getTypeSizeInBits(DestTy) == DL.getTypeSizeInBits(SrcTy)) { + // But be careful not to coerce non-integral pointers illegally. + if (SrcSize == DestSize && + DL.isNonIntegralPointerType(SrcTy->getScalarType()) == + DL.isNonIntegralPointerType(DestTy->getScalarType())) { Instruction::CastOps Cast = Instruction::BitCast; // If we are going from a pointer to int or vice versa, we spell the cast // differently. @@ -361,7 +380,7 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy, Constant *ElemC; do { ElemC = C->getAggregateElement(Elem++); - } while (ElemC && DL.getTypeSizeInBits(ElemC->getType()) == 0); + } while (ElemC && DL.getTypeSizeInBits(ElemC->getType()).isZero()); C = ElemC; } else { C = C->getAggregateElement(0u); @@ -460,15 +479,18 @@ bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr, if (isa<ConstantArray>(C) || isa<ConstantVector>(C) || isa<ConstantDataSequential>(C)) { - Type *EltTy = C->getType()->getSequentialElementType(); + uint64_t NumElts; + Type *EltTy; + if (auto *AT = dyn_cast<ArrayType>(C->getType())) { + NumElts = AT->getNumElements(); + EltTy = AT->getElementType(); + } else { + NumElts = cast<VectorType>(C->getType())->getNumElements(); + EltTy = cast<VectorType>(C->getType())->getElementType(); + } uint64_t EltSize = DL.getTypeAllocSize(EltTy); uint64_t Index = ByteOffset / EltSize; uint64_t Offset = ByteOffset - Index * EltSize; - uint64_t NumElts; - if (auto *AT = dyn_cast<ArrayType>(C->getType())) - NumElts = AT->getNumElements(); - else - NumElts = C->getType()->getVectorNumElements(); for (; Index != NumElts; ++Index) { if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr, @@ -501,6 +523,10 @@ bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr, Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy, const DataLayout &DL) { + // Bail out early. Not expect to load from scalable global variable. + if (isa<ScalableVectorType>(LoadTy)) + return nullptr; + auto *PTy = cast<PointerType>(C->getType()); auto *IntType = dyn_cast<IntegerType>(LoadTy); @@ -520,8 +546,8 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy, else if (LoadTy->isDoubleTy()) MapTy = Type::getInt64Ty(C->getContext()); else if (LoadTy->isVectorTy()) { - MapTy = PointerType::getIntNTy(C->getContext(), - DL.getTypeSizeInBits(LoadTy)); + MapTy = PointerType::getIntNTy( + C->getContext(), DL.getTypeSizeInBits(LoadTy).getFixedSize()); } else return nullptr; @@ -561,7 +587,8 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy, return nullptr; int64_t Offset = OffsetAI.getSExtValue(); - int64_t InitializerSize = DL.getTypeAllocSize(GV->getInitializer()->getType()); + int64_t InitializerSize = + DL.getTypeAllocSize(GV->getInitializer()->getType()).getFixedSize(); // If we're not accessing anything in this constant, the result is undefined. if (Offset <= -1 * static_cast<int64_t>(BytesLoaded)) @@ -734,8 +761,7 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1, return Op1; } - Known0.Zero |= Known1.Zero; - Known0.One &= Known1.One; + Known0 &= Known1; if (Known0.isConstant()) return ConstantInt::get(Op0->getType(), Known0.getConstant()); } @@ -794,10 +820,7 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops, Constant *C = ConstantExpr::getGetElementPtr( SrcElemTy, Ops[0], NewIdxs, /*InBounds=*/false, InRangeIndex); - if (Constant *Folded = ConstantFoldConstant(C, DL, TLI)) - C = Folded; - - return C; + return ConstantFoldConstant(C, DL, TLI); } /// Strip the pointer casts, but preserve the address space information. @@ -828,7 +851,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, Type *SrcElemTy = GEP->getSourceElementType(); Type *ResElemTy = GEP->getResultElementType(); Type *ResTy = GEP->getType(); - if (!SrcElemTy->isSized()) + if (!SrcElemTy->isSized() || isa<ScalableVectorType>(SrcElemTy)) return nullptr; if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, @@ -857,9 +880,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType()); Res = ConstantExpr::getSub(Res, CE->getOperand(1)); Res = ConstantExpr::getIntToPtr(Res, ResTy); - if (auto *FoldedRes = ConstantFoldConstant(Res, DL, TLI)) - Res = FoldedRes; - return Res; + return ConstantFoldConstant(Res, DL, TLI); } } return nullptr; @@ -932,11 +953,11 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, // Only handle pointers to sized types, not pointers to functions. if (!Ty->isSized()) return nullptr; - } else if (auto *ATy = dyn_cast<SequentialType>(Ty)) { - Ty = ATy->getElementType(); } else { - // We've reached some non-indexable type. - break; + Type *NextTy = GetElementPtrInst::getTypeAtIndex(Ty, (uint64_t)0); + if (!NextTy) + break; + Ty = NextTy; } // Determine which element of the array the offset points into. @@ -1062,7 +1083,8 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, case Instruction::InsertElement: return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]); case Instruction::ShuffleVector: - return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); + return ConstantExpr::getShuffleVector( + Ops[0], Ops[1], cast<ShuffleVectorInst>(InstOrCE)->getShuffleMask()); } } @@ -1079,23 +1101,19 @@ ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI, SmallDenseMap<Constant *, Constant *> &FoldedOps) { if (!isa<ConstantVector>(C) && !isa<ConstantExpr>(C)) - return nullptr; + return const_cast<Constant *>(C); SmallVector<Constant *, 8> Ops; - for (const Use &NewU : C->operands()) { - auto *NewC = cast<Constant>(&NewU); + for (const Use &OldU : C->operands()) { + Constant *OldC = cast<Constant>(&OldU); + Constant *NewC = OldC; // Recursively fold the ConstantExpr's operands. If we have already folded // a ConstantExpr, we don't have to process it again. - if (isa<ConstantVector>(NewC) || isa<ConstantExpr>(NewC)) { - auto It = FoldedOps.find(NewC); + if (isa<ConstantVector>(OldC) || isa<ConstantExpr>(OldC)) { + auto It = FoldedOps.find(OldC); if (It == FoldedOps.end()) { - if (auto *FoldedC = - ConstantFoldConstantImpl(NewC, DL, TLI, FoldedOps)) { - FoldedOps.insert({NewC, FoldedC}); - NewC = FoldedC; - } else { - FoldedOps.insert({NewC, NewC}); - } + NewC = ConstantFoldConstantImpl(OldC, DL, TLI, FoldedOps); + FoldedOps.insert({OldC, NewC}); } else { NewC = It->second; } @@ -1136,8 +1154,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL, if (!C) return nullptr; // Fold the PHI's operands. - if (auto *FoldedC = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps)) - C = FoldedC; + C = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps); // If the incoming value is a different constant to // the one we saw previously, then give up. if (CommonValue && C != CommonValue) @@ -1159,9 +1176,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL, for (const Use &OpU : I->operands()) { auto *Op = cast<Constant>(&OpU); // Fold the Instruction's operands. - if (auto *FoldedOp = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps)) - Op = FoldedOp; - + Op = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps); Ops.push_back(Op); } @@ -1400,41 +1415,19 @@ llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, // bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { - if (Call->isNoBuiltin() || Call->isStrictFP()) + if (Call->isNoBuiltin()) return false; switch (F->getIntrinsicID()) { - case Intrinsic::fabs: - case Intrinsic::minnum: - case Intrinsic::maxnum: - case Intrinsic::minimum: - case Intrinsic::maximum: - case Intrinsic::log: - case Intrinsic::log2: - case Intrinsic::log10: - case Intrinsic::exp: - case Intrinsic::exp2: - case Intrinsic::floor: - case Intrinsic::ceil: - case Intrinsic::sqrt: - case Intrinsic::sin: - case Intrinsic::cos: - case Intrinsic::trunc: - case Intrinsic::rint: - case Intrinsic::nearbyint: - case Intrinsic::pow: - case Intrinsic::powi: + // Operations that do not operate floating-point numbers and do not depend on + // FP environment can be folded even in strictfp functions. case Intrinsic::bswap: case Intrinsic::ctpop: case Intrinsic::ctlz: case Intrinsic::cttz: case Intrinsic::fshl: case Intrinsic::fshr: - case Intrinsic::fma: - case Intrinsic::fmuladd: - case Intrinsic::copysign: case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: - case Intrinsic::round: case Intrinsic::masked_load: case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: @@ -1448,9 +1441,49 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::usub_sat: case Intrinsic::smul_fix: case Intrinsic::smul_fix_sat: + case Intrinsic::bitreverse: + case Intrinsic::is_constant: + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::experimental_vector_reduce_umax: + return true; + + // Floating point operations cannot be folded in strictfp functions in + // general case. They can be folded if FP environment is known to compiler. + case Intrinsic::minnum: + case Intrinsic::maxnum: + case Intrinsic::minimum: + case Intrinsic::maximum: + case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::sqrt: + case Intrinsic::sin: + case Intrinsic::cos: + case Intrinsic::pow: + case Intrinsic::powi: + case Intrinsic::fma: + case Intrinsic::fmuladd: case Intrinsic::convert_from_fp16: case Intrinsic::convert_to_fp16: - case Intrinsic::bitreverse: + case Intrinsic::amdgcn_cos: + case Intrinsic::amdgcn_cubeid: + case Intrinsic::amdgcn_cubema: + case Intrinsic::amdgcn_cubesc: + case Intrinsic::amdgcn_cubetc: + case Intrinsic::amdgcn_fmul_legacy: + case Intrinsic::amdgcn_fract: + case Intrinsic::amdgcn_ldexp: + case Intrinsic::amdgcn_sin: + // The intrinsics below depend on rounding mode in MXCSR. case Intrinsic::x86_sse_cvtss2si: case Intrinsic::x86_sse_cvtss2si64: case Intrinsic::x86_sse_cvttss2si: @@ -1475,14 +1508,37 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::x86_avx512_vcvtsd2usi64: case Intrinsic::x86_avx512_cvttsd2usi: case Intrinsic::x86_avx512_cvttsd2usi64: - case Intrinsic::is_constant: + return !Call->isStrictFP(); + + // Sign operations are actually bitwise operations, they do not raise + // exceptions even for SNANs. + case Intrinsic::fabs: + case Intrinsic::copysign: + // Non-constrained variants of rounding operations means default FP + // environment, they can be folded in any case. + case Intrinsic::ceil: + case Intrinsic::floor: + case Intrinsic::round: + case Intrinsic::roundeven: + case Intrinsic::trunc: + case Intrinsic::nearbyint: + case Intrinsic::rint: + // Constrained intrinsics can be folded if FP environment is known + // to compiler. + case Intrinsic::experimental_constrained_ceil: + case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_round: + case Intrinsic::experimental_constrained_roundeven: + case Intrinsic::experimental_constrained_trunc: + case Intrinsic::experimental_constrained_nearbyint: + case Intrinsic::experimental_constrained_rint: return true; default: return false; case Intrinsic::not_intrinsic: break; } - if (!F->hasName()) + if (!F->hasName() || Call->isStrictFP()) return false; // In these cases, the check of the length is required. We don't want to @@ -1517,7 +1573,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case 'p': return Name == "pow" || Name == "powf"; case 'r': - return Name == "rint" || Name == "rintf" || + return Name == "remainder" || Name == "remainderf" || + Name == "rint" || Name == "rintf" || Name == "round" || Name == "roundf"; case 's': return Name == "sin" || Name == "sinf" || @@ -1616,6 +1673,53 @@ Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), double V, return GetConstantFoldFPValue(V, Ty); } +Constant *ConstantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) { + FixedVectorType *VT = dyn_cast<FixedVectorType>(Op->getType()); + if (!VT) + return nullptr; + ConstantInt *CI = dyn_cast<ConstantInt>(Op->getAggregateElement(0U)); + if (!CI) + return nullptr; + APInt Acc = CI->getValue(); + + for (unsigned I = 1; I < VT->getNumElements(); I++) { + if (!(CI = dyn_cast<ConstantInt>(Op->getAggregateElement(I)))) + return nullptr; + const APInt &X = CI->getValue(); + switch (IID) { + case Intrinsic::experimental_vector_reduce_add: + Acc = Acc + X; + break; + case Intrinsic::experimental_vector_reduce_mul: + Acc = Acc * X; + break; + case Intrinsic::experimental_vector_reduce_and: + Acc = Acc & X; + break; + case Intrinsic::experimental_vector_reduce_or: + Acc = Acc | X; + break; + case Intrinsic::experimental_vector_reduce_xor: + Acc = Acc ^ X; + break; + case Intrinsic::experimental_vector_reduce_smin: + Acc = APIntOps::smin(Acc, X); + break; + case Intrinsic::experimental_vector_reduce_smax: + Acc = APIntOps::smax(Acc, X); + break; + case Intrinsic::experimental_vector_reduce_umin: + Acc = APIntOps::umin(Acc, X); + break; + case Intrinsic::experimental_vector_reduce_umax: + Acc = APIntOps::umax(Acc, X); + break; + } + } + + return ConstantInt::get(Op->getContext(), Acc); +} + /// Attempt to fold an SSE floating point to integer conversion of a constant /// floating point. If roundTowardZero is false, the default IEEE rounding is /// used (toward nearest, ties to even). This matches the behavior of the @@ -1756,6 +1860,11 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, return ConstantFP::get(Ty->getContext(), U); } + if (IntrinsicID == Intrinsic::roundeven) { + U.roundToIntegral(APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), U); + } + if (IntrinsicID == Intrinsic::ceil) { U.roundToIntegral(APFloat::rmTowardPositive); return ConstantFP::get(Ty->getContext(), U); @@ -1776,10 +1885,70 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, return ConstantFP::get(Ty->getContext(), U); } + if (IntrinsicID == Intrinsic::amdgcn_fract) { + // The v_fract instruction behaves like the OpenCL spec, which defines + // fract(x) as fmin(x - floor(x), 0x1.fffffep-1f): "The min() operator is + // there to prevent fract(-small) from returning 1.0. It returns the + // largest positive floating-point number less than 1.0." + APFloat FloorU(U); + FloorU.roundToIntegral(APFloat::rmTowardNegative); + APFloat FractU(U - FloorU); + APFloat AlmostOne(U.getSemantics(), 1); + AlmostOne.next(/*nextDown*/ true); + return ConstantFP::get(Ty->getContext(), minimum(FractU, AlmostOne)); + } + + // Rounding operations (floor, trunc, ceil, round and nearbyint) do not + // raise FP exceptions, unless the argument is signaling NaN. + + Optional<APFloat::roundingMode> RM; + switch (IntrinsicID) { + default: + break; + case Intrinsic::experimental_constrained_nearbyint: + case Intrinsic::experimental_constrained_rint: { + auto CI = cast<ConstrainedFPIntrinsic>(Call); + RM = CI->getRoundingMode(); + if (!RM || RM.getValue() == RoundingMode::Dynamic) + return nullptr; + break; + } + case Intrinsic::experimental_constrained_round: + RM = APFloat::rmNearestTiesToAway; + break; + case Intrinsic::experimental_constrained_ceil: + RM = APFloat::rmTowardPositive; + break; + case Intrinsic::experimental_constrained_floor: + RM = APFloat::rmTowardNegative; + break; + case Intrinsic::experimental_constrained_trunc: + RM = APFloat::rmTowardZero; + break; + } + if (RM) { + auto CI = cast<ConstrainedFPIntrinsic>(Call); + if (U.isFinite()) { + APFloat::opStatus St = U.roundToIntegral(*RM); + if (IntrinsicID == Intrinsic::experimental_constrained_rint && + St == APFloat::opInexact) { + Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior(); + if (EB && *EB == fp::ebStrict) + return nullptr; + } + } else if (U.isSignaling()) { + Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior(); + if (EB && *EB != fp::ebIgnore) + return nullptr; + U = APFloat::getQNaN(U.getSemantics()); + } + return ConstantFP::get(Ty->getContext(), U); + } + /// We only fold functions with finite arguments. Folding NaN and inf is /// likely to be aborted with an exception anyway, and some host libms /// have known errors raising exceptions. - if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity()) + if (!U.isFinite()) return nullptr; /// Currently APFloat versions of these functions do not exist, so we use @@ -1809,6 +1978,26 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, return ConstantFoldFP(cos, V, Ty); case Intrinsic::sqrt: return ConstantFoldFP(sqrt, V, Ty); + case Intrinsic::amdgcn_cos: + case Intrinsic::amdgcn_sin: + if (V < -256.0 || V > 256.0) + // The gfx8 and gfx9 architectures handle arguments outside the range + // [-256, 256] differently. This should be a rare case so bail out + // rather than trying to handle the difference. + return nullptr; + bool IsCos = IntrinsicID == Intrinsic::amdgcn_cos; + double V4 = V * 4.0; + if (V4 == floor(V4)) { + // Force exact results for quarter-integer inputs. + const double SinVals[4] = { 0.0, 1.0, 0.0, -1.0 }; + V = SinVals[((int)V4 + (IsCos ? 1 : 0)) & 3]; + } else { + if (IsCos) + V = cos(V * 2.0 * numbers::pi); + else + V = sin(V * 2.0 * numbers::pi); + } + return GetConstantFoldFPValue(V, Ty); } if (!TLI) @@ -1990,12 +2179,40 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, } } + if (isa<ConstantAggregateZero>(Operands[0])) { + switch (IntrinsicID) { + default: break; + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::experimental_vector_reduce_umax: + return ConstantInt::get(Ty, 0); + } + } + // Support ConstantVector in case we have an Undef in the top. if (isa<ConstantVector>(Operands[0]) || isa<ConstantDataVector>(Operands[0])) { auto *Op = cast<Constant>(Operands[0]); switch (IntrinsicID) { default: break; + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::experimental_vector_reduce_umax: + if (Constant *C = ConstantFoldVectorReduce(IntrinsicID, Op)) + return C; + break; case Intrinsic::x86_sse_cvtss2si: case Intrinsic::x86_sse_cvtss2si64: case Intrinsic::x86_sse2_cvtsd2si: @@ -2074,6 +2291,16 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, return ConstantFP::get(Ty->getContext(), maximum(C1, C2)); } + if (IntrinsicID == Intrinsic::amdgcn_fmul_legacy) { + const APFloat &C1 = Op1->getValueAPF(); + const APFloat &C2 = Op2->getValueAPF(); + // The legacy behaviour is that multiplying zero by anything, even NaN + // or infinity, gives +0.0. + if (C1.isZero() || C2.isZero()) + return ConstantFP::getNullValue(Ty); + return ConstantFP::get(Ty->getContext(), C1 * C2); + } + if (!TLI) return nullptr; @@ -2097,6 +2324,14 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, return ConstantFP::get(Ty->getContext(), V); } break; + case LibFunc_remainder: + case LibFunc_remainderf: + if (TLI->has(Func)) { + APFloat V = Op1->getValueAPF(); + if (APFloat::opStatus::opOK == V.remainder(Op2->getValueAPF())) + return ConstantFP::get(Ty->getContext(), V); + } + break; case LibFunc_atan2: case LibFunc_atan2f: case LibFunc_atan2_finite: @@ -2118,6 +2353,16 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, return ConstantFP::get(Ty->getContext(), APFloat((double)std::pow((double)Op1V, (int)Op2C->getZExtValue()))); + + if (IntrinsicID == Intrinsic::amdgcn_ldexp) { + // FIXME: Should flush denorms depending on FP mode, but that's ignored + // everywhere else. + + // scalbn is equivalent to ldexp with float radix 2 + APFloat Result = scalbn(Op1->getValueAPF(), Op2C->getSExtValue(), + APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), Result); + } } return nullptr; } @@ -2275,6 +2520,61 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, return nullptr; } +static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID, + const APFloat &S0, + const APFloat &S1, + const APFloat &S2) { + unsigned ID; + const fltSemantics &Sem = S0.getSemantics(); + APFloat MA(Sem), SC(Sem), TC(Sem); + if (abs(S2) >= abs(S0) && abs(S2) >= abs(S1)) { + if (S2.isNegative() && S2.isNonZero() && !S2.isNaN()) { + // S2 < 0 + ID = 5; + SC = -S0; + } else { + ID = 4; + SC = S0; + } + MA = S2; + TC = -S1; + } else if (abs(S1) >= abs(S0)) { + if (S1.isNegative() && S1.isNonZero() && !S1.isNaN()) { + // S1 < 0 + ID = 3; + TC = -S2; + } else { + ID = 2; + TC = S2; + } + MA = S1; + SC = S0; + } else { + if (S0.isNegative() && S0.isNonZero() && !S0.isNaN()) { + // S0 < 0 + ID = 1; + SC = S2; + } else { + ID = 0; + SC = -S2; + } + MA = S0; + TC = -S1; + } + switch (IntrinsicID) { + default: + llvm_unreachable("unhandled amdgcn cube intrinsic"); + case Intrinsic::amdgcn_cubeid: + return APFloat(Sem, ID); + case Intrinsic::amdgcn_cubema: + return MA + MA; + case Intrinsic::amdgcn_cubesc: + return SC; + case Intrinsic::amdgcn_cubetc: + return TC; + } +} + static Constant *ConstantFoldScalarCall3(StringRef Name, Intrinsic::ID IntrinsicID, Type *Ty, @@ -2295,6 +2595,15 @@ static Constant *ConstantFoldScalarCall3(StringRef Name, APFloat::rmNearestTiesToEven); return ConstantFP::get(Ty->getContext(), V); } + case Intrinsic::amdgcn_cubeid: + case Intrinsic::amdgcn_cubema: + case Intrinsic::amdgcn_cubesc: + case Intrinsic::amdgcn_cubetc: { + APFloat V = ConstantFoldAMDGCNCubeIntrinsic( + IntrinsicID, Op1->getValueAPF(), Op2->getValueAPF(), + Op3->getValueAPF()); + return ConstantFP::get(Ty->getContext(), V); + } } } } @@ -2313,8 +2622,8 @@ static Constant *ConstantFoldScalarCall3(StringRef Name, // how rounding should be done, and provide their own folding to be // consistent with rounding. This is the same approach as used by // DAGTypeLegalizer::ExpandIntRes_MULFIX. - APInt Lhs = Op1->getValue(); - APInt Rhs = Op2->getValue(); + const APInt &Lhs = Op1->getValue(); + const APInt &Rhs = Op2->getValue(); unsigned Scale = Op3->getValue().getZExtValue(); unsigned Width = Lhs.getBitWidth(); assert(Scale < Width && "Illegal scale."); @@ -2395,19 +2704,26 @@ static Constant *ConstantFoldVectorCall(StringRef Name, const DataLayout &DL, const TargetLibraryInfo *TLI, const CallBase *Call) { - SmallVector<Constant *, 4> Result(VTy->getNumElements()); + // Do not iterate on scalable vector. The number of elements is unknown at + // compile-time. + if (isa<ScalableVectorType>(VTy)) + return nullptr; + + auto *FVTy = cast<FixedVectorType>(VTy); + + SmallVector<Constant *, 4> Result(FVTy->getNumElements()); SmallVector<Constant *, 4> Lane(Operands.size()); - Type *Ty = VTy->getElementType(); + Type *Ty = FVTy->getElementType(); if (IntrinsicID == Intrinsic::masked_load) { auto *SrcPtr = Operands[0]; auto *Mask = Operands[2]; auto *Passthru = Operands[3]; - Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, VTy, DL); + Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, FVTy, DL); SmallVector<Constant *, 32> NewElements; - for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) { + for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) { auto *MaskElt = Mask->getAggregateElement(I); if (!MaskElt) break; @@ -2433,12 +2749,12 @@ static Constant *ConstantFoldVectorCall(StringRef Name, return nullptr; } } - if (NewElements.size() != VTy->getNumElements()) + if (NewElements.size() != FVTy->getNumElements()) return nullptr; return ConstantVector::get(NewElements); } - for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) { + for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) { // Gather a column of constants. for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) { // Some intrinsics use a scalar type for certain arguments. @@ -2470,7 +2786,7 @@ static Constant *ConstantFoldVectorCall(StringRef Name, Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef<Constant *> Operands, const TargetLibraryInfo *TLI) { - if (Call->isNoBuiltin() || Call->isStrictFP()) + if (Call->isNoBuiltin()) return nullptr; if (!F->hasName()) return nullptr; @@ -2520,11 +2836,9 @@ bool llvm::isMathLibCallNoop(const CallBase *Call, case LibFunc_expf: // FIXME: These boundaries are slightly conservative. if (OpC->getType()->isDoubleTy()) - return Op.compare(APFloat(-745.0)) != APFloat::cmpLessThan && - Op.compare(APFloat(709.0)) != APFloat::cmpGreaterThan; + return !(Op < APFloat(-745.0) || Op > APFloat(709.0)); if (OpC->getType()->isFloatTy()) - return Op.compare(APFloat(-103.0f)) != APFloat::cmpLessThan && - Op.compare(APFloat(88.0f)) != APFloat::cmpGreaterThan; + return !(Op < APFloat(-103.0f) || Op > APFloat(88.0f)); break; case LibFunc_exp2l: @@ -2532,11 +2846,9 @@ bool llvm::isMathLibCallNoop(const CallBase *Call, case LibFunc_exp2f: // FIXME: These boundaries are slightly conservative. if (OpC->getType()->isDoubleTy()) - return Op.compare(APFloat(-1074.0)) != APFloat::cmpLessThan && - Op.compare(APFloat(1023.0)) != APFloat::cmpGreaterThan; + return !(Op < APFloat(-1074.0) || Op > APFloat(1023.0)); if (OpC->getType()->isFloatTy()) - return Op.compare(APFloat(-149.0f)) != APFloat::cmpLessThan && - Op.compare(APFloat(127.0f)) != APFloat::cmpGreaterThan; + return !(Op < APFloat(-149.0f) || Op > APFloat(127.0f)); break; case LibFunc_sinl: @@ -2566,10 +2878,8 @@ bool llvm::isMathLibCallNoop(const CallBase *Call, case LibFunc_acosl: case LibFunc_acos: case LibFunc_acosf: - return Op.compare(APFloat(Op.getSemantics(), "-1")) != - APFloat::cmpLessThan && - Op.compare(APFloat(Op.getSemantics(), "1")) != - APFloat::cmpGreaterThan; + return !(Op < APFloat(Op.getSemantics(), "-1") || + Op > APFloat(Op.getSemantics(), "1")); case LibFunc_sinh: case LibFunc_cosh: @@ -2579,11 +2889,9 @@ bool llvm::isMathLibCallNoop(const CallBase *Call, case LibFunc_coshl: // FIXME: These boundaries are slightly conservative. if (OpC->getType()->isDoubleTy()) - return Op.compare(APFloat(-710.0)) != APFloat::cmpLessThan && - Op.compare(APFloat(710.0)) != APFloat::cmpGreaterThan; + return !(Op < APFloat(-710.0) || Op > APFloat(710.0)); if (OpC->getType()->isFloatTy()) - return Op.compare(APFloat(-89.0f)) != APFloat::cmpLessThan && - Op.compare(APFloat(89.0f)) != APFloat::cmpGreaterThan; + return !(Op < APFloat(-89.0f) || Op > APFloat(89.0f)); break; case LibFunc_sqrtl: @@ -2626,6 +2934,9 @@ bool llvm::isMathLibCallNoop(const CallBase *Call, case LibFunc_fmodl: case LibFunc_fmod: case LibFunc_fmodf: + case LibFunc_remainderl: + case LibFunc_remainder: + case LibFunc_remainderf: return Op0.isNaN() || Op1.isNaN() || (!Op0.isInfinity() && !Op1.isZero()); @@ -2637,3 +2948,5 @@ bool llvm::isMathLibCallNoop(const CallBase *Call, return false; } + +void TargetFolder::anchor() {} diff --git a/llvm/lib/Analysis/DDG.cpp b/llvm/lib/Analysis/DDG.cpp index 90ce13e6f6503..280d9ef79efad 100644 --- a/llvm/lib/Analysis/DDG.cpp +++ b/llvm/lib/Analysis/DDG.cpp @@ -16,6 +16,11 @@ using namespace llvm; +static cl::opt<bool> SimplifyDDG( + "ddg-simplify", cl::init(true), cl::Hidden, cl::ZeroOrMore, + cl::desc( + "Simplify DDG by merging nodes that have less interesting edges.")); + static cl::opt<bool> CreatePiBlocks("ddg-pi-blocks", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Create pi-block nodes.")); @@ -257,10 +262,47 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const DataDependenceGraph &G) { return OS; } -bool DDGBuilder::shouldCreatePiBlocks() const { - return CreatePiBlocks; +//===--------------------------------------------------------------------===// +// DDGBuilder implementation +//===--------------------------------------------------------------------===// + +bool DDGBuilder::areNodesMergeable(const DDGNode &Src, + const DDGNode &Tgt) const { + // Only merge two nodes if they are both simple nodes and the consecutive + // instructions after merging belong to the same BB. + const auto *SimpleSrc = dyn_cast<const SimpleDDGNode>(&Src); + const auto *SimpleTgt = dyn_cast<const SimpleDDGNode>(&Tgt); + if (!SimpleSrc || !SimpleTgt) + return false; + + return SimpleSrc->getLastInstruction()->getParent() == + SimpleTgt->getFirstInstruction()->getParent(); } +void DDGBuilder::mergeNodes(DDGNode &A, DDGNode &B) { + DDGEdge &EdgeToFold = A.back(); + assert(A.getEdges().size() == 1 && EdgeToFold.getTargetNode() == B && + "Expected A to have a single edge to B."); + assert(isa<SimpleDDGNode>(&A) && isa<SimpleDDGNode>(&B) && + "Expected simple nodes"); + + // Copy instructions from B to the end of A. + cast<SimpleDDGNode>(&A)->appendInstructions(*cast<SimpleDDGNode>(&B)); + + // Move to A any outgoing edges from B. + for (DDGEdge *BE : B) + Graph.connect(A, BE->getTargetNode(), *BE); + + A.removeEdge(EdgeToFold); + destroyEdge(EdgeToFold); + Graph.removeNode(B); + destroyNode(B); +} + +bool DDGBuilder::shouldSimplify() const { return SimplifyDDG; } + +bool DDGBuilder::shouldCreatePiBlocks() const { return CreatePiBlocks; } + //===--------------------------------------------------------------------===// // DDG Analysis Passes //===--------------------------------------------------------------------===// diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 9b38053c196b9..bcfeef7fb8abc 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -647,7 +647,7 @@ void Dependence::dump(raw_ostream &OS) const { // tbaa, non-overlapping regions etc), then it is known there is no dependecy. // Otherwise the underlying objects are checked to see if they point to // different identifiable objects. -static AliasResult underlyingObjectsAlias(AliasAnalysis *AA, +static AliasResult underlyingObjectsAlias(AAResults *AA, const DataLayout &DL, const MemoryLocation &LocA, const MemoryLocation &LocB) { @@ -3264,23 +3264,134 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, assert(isLoadOrStore(Dst) && "instruction is not load or store"); Value *SrcPtr = getLoadStorePointerOperand(Src); Value *DstPtr = getLoadStorePointerOperand(Dst); - Loop *SrcLoop = LI->getLoopFor(Src->getParent()); Loop *DstLoop = LI->getLoopFor(Dst->getParent()); + const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop); + const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop); + const SCEVUnknown *SrcBase = + dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); + const SCEVUnknown *DstBase = + dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); + + if (!SrcBase || !DstBase || SrcBase != DstBase) + return false; - // Below code mimics the code in Delinearization.cpp - const SCEV *SrcAccessFn = - SE->getSCEVAtScope(SrcPtr, SrcLoop); - const SCEV *DstAccessFn = - SE->getSCEVAtScope(DstPtr, DstLoop); + SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; + + if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn, + SrcSubscripts, DstSubscripts) && + !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn, + SrcSubscripts, DstSubscripts)) + return false; + + int Size = SrcSubscripts.size(); + LLVM_DEBUG({ + dbgs() << "\nSrcSubscripts: "; + for (int I = 0; I < Size; I++) + dbgs() << *SrcSubscripts[I]; + dbgs() << "\nDstSubscripts: "; + for (int I = 0; I < Size; I++) + dbgs() << *DstSubscripts[I]; + }); + // The delinearization transforms a single-subscript MIV dependence test into + // a multi-subscript SIV dependence test that is easier to compute. So we + // resize Pair to contain as many pairs of subscripts as the delinearization + // has found, and then initialize the pairs following the delinearization. + Pair.resize(Size); + for (int I = 0; I < Size; ++I) { + Pair[I].Src = SrcSubscripts[I]; + Pair[I].Dst = DstSubscripts[I]; + unifySubscriptType(&Pair[I]); + } + + return true; +} + +bool DependenceInfo::tryDelinearizeFixedSize( + Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, + const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts, + SmallVectorImpl<const SCEV *> &DstSubscripts) { + + // In general we cannot safely assume that the subscripts recovered from GEPs + // are in the range of values defined for their corresponding array + // dimensions. For example some C language usage/interpretation make it + // impossible to verify this at compile-time. As such we give up here unless + // we can assume that the subscripts do not overlap into neighboring + // dimensions and that the number of dimensions matches the number of + // subscripts being recovered. + if (!DisableDelinearizationChecks) + return false; + + Value *SrcPtr = getLoadStorePointerOperand(Src); + Value *DstPtr = getLoadStorePointerOperand(Dst); const SCEVUnknown *SrcBase = dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); const SCEVUnknown *DstBase = dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); + assert(SrcBase && DstBase && SrcBase == DstBase && + "expected src and dst scev unknowns to be equal"); - if (!SrcBase || !DstBase || SrcBase != DstBase) + // Check the simple case where the array dimensions are fixed size. + auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr); + auto *DstGEP = dyn_cast<GetElementPtrInst>(DstPtr); + if (!SrcGEP || !DstGEP) + return false; + + SmallVector<int, 4> SrcSizes, DstSizes; + SE->getIndexExpressionsFromGEP(SrcGEP, SrcSubscripts, SrcSizes); + SE->getIndexExpressionsFromGEP(DstGEP, DstSubscripts, DstSizes); + + // Check that the two size arrays are non-empty and equal in length and + // value. + if (SrcSizes.empty() || SrcSubscripts.size() <= 1 || + SrcSizes.size() != DstSizes.size() || + !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) { + SrcSubscripts.clear(); + DstSubscripts.clear(); return false; + } + + Value *SrcBasePtr = SrcGEP->getOperand(0); + Value *DstBasePtr = DstGEP->getOperand(0); + while (auto *PCast = dyn_cast<BitCastInst>(SrcBasePtr)) + SrcBasePtr = PCast->getOperand(0); + while (auto *PCast = dyn_cast<BitCastInst>(DstBasePtr)) + DstBasePtr = PCast->getOperand(0); + + // Check that for identical base pointers we do not miss index offsets + // that have been added before this GEP is applied. + if (SrcBasePtr == SrcBase->getValue() && DstBasePtr == DstBase->getValue()) { + assert(SrcSubscripts.size() == DstSubscripts.size() && + SrcSubscripts.size() == SrcSizes.size() + 1 && + "Expected equal number of entries in the list of sizes and " + "subscripts."); + LLVM_DEBUG({ + dbgs() << "Delinearized subscripts of fixed-size array\n" + << "SrcGEP:" << *SrcGEP << "\n" + << "DstGEP:" << *DstGEP << "\n"; + }); + return true; + } + + SrcSubscripts.clear(); + DstSubscripts.clear(); + return false; +} + +bool DependenceInfo::tryDelinearizeParametricSize( + Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, + const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts, + SmallVectorImpl<const SCEV *> &DstSubscripts) { + + Value *SrcPtr = getLoadStorePointerOperand(Src); + Value *DstPtr = getLoadStorePointerOperand(Dst); + const SCEVUnknown *SrcBase = + dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); + const SCEVUnknown *DstBase = + dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); + assert(SrcBase && DstBase && SrcBase == DstBase && + "expected src and dst scev unknowns to be equal"); const SCEV *ElementSize = SE->getElementSize(Src); if (ElementSize != SE->getElementSize(Dst)) @@ -3304,7 +3415,6 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, SE->findArrayDimensions(Terms, Sizes, ElementSize); // Third step: compute the access functions for each subscript. - SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes); SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes); @@ -3313,7 +3423,7 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, SrcSubscripts.size() != DstSubscripts.size()) return false; - int size = SrcSubscripts.size(); + size_t Size = SrcSubscripts.size(); // Statically check that the array bounds are in-range. The first subscript we // don't have a size for and it cannot overflow into another subscript, so is @@ -3322,40 +3432,20 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, // FIXME: It may be better to record these sizes and add them as constraints // to the dependency checks. if (!DisableDelinearizationChecks) - for (int i = 1; i < size; ++i) { - if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr)) + for (size_t I = 1; I < Size; ++I) { + if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr)) return false; - if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1])) + if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1])) return false; - if (!isKnownNonNegative(DstSubscripts[i], DstPtr)) + if (!isKnownNonNegative(DstSubscripts[I], DstPtr)) return false; - if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1])) + if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1])) return false; } - LLVM_DEBUG({ - dbgs() << "\nSrcSubscripts: "; - for (int i = 0; i < size; i++) - dbgs() << *SrcSubscripts[i]; - dbgs() << "\nDstSubscripts: "; - for (int i = 0; i < size; i++) - dbgs() << *DstSubscripts[i]; - }); - - // The delinearization transforms a single-subscript MIV dependence test into - // a multi-subscript SIV dependence test that is easier to compute. So we - // resize Pair to contain as many pairs of subscripts as the delinearization - // has found, and then initialize the pairs following the delinearization. - Pair.resize(size); - for (int i = 0; i < size; ++i) { - Pair[i].Src = SrcSubscripts[i]; - Pair[i].Dst = DstSubscripts[i]; - unifySubscriptType(&Pair[i]); - } - return true; } diff --git a/llvm/lib/Analysis/DependenceGraphBuilder.cpp b/llvm/lib/Analysis/DependenceGraphBuilder.cpp index e8a1a2fff9195..7a98d844e4cb1 100644 --- a/llvm/lib/Analysis/DependenceGraphBuilder.cpp +++ b/llvm/lib/Analysis/DependenceGraphBuilder.cpp @@ -10,6 +10,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/DependenceGraphBuilder.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/EnumeratedArray.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/Statistic.h" @@ -374,6 +375,109 @@ void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() { } } +template <class G> void AbstractDependenceGraphBuilder<G>::simplify() { + if (!shouldSimplify()) + return; + LLVM_DEBUG(dbgs() << "==== Start of Graph Simplification ===\n"); + + // This algorithm works by first collecting a set of candidate nodes that have + // an out-degree of one (in terms of def-use edges), and then ignoring those + // whose targets have an in-degree more than one. Each node in the resulting + // set can then be merged with its corresponding target and put back into the + // worklist until no further merge candidates are available. + SmallPtrSet<NodeType *, 32> CandidateSourceNodes; + + // A mapping between nodes and their in-degree. To save space, this map + // only contains nodes that are targets of nodes in the CandidateSourceNodes. + DenseMap<NodeType *, unsigned> TargetInDegreeMap; + + for (NodeType *N : Graph) { + if (N->getEdges().size() != 1) + continue; + EdgeType &Edge = N->back(); + if (!Edge.isDefUse()) + continue; + CandidateSourceNodes.insert(N); + + // Insert an element into the in-degree map and initialize to zero. The + // count will get updated in the next step. + TargetInDegreeMap.insert({&Edge.getTargetNode(), 0}); + } + + LLVM_DEBUG({ + dbgs() << "Size of candidate src node list:" << CandidateSourceNodes.size() + << "\nNode with single outgoing def-use edge:\n"; + for (NodeType *N : CandidateSourceNodes) { + dbgs() << N << "\n"; + } + }); + + for (NodeType *N : Graph) { + for (EdgeType *E : *N) { + NodeType *Tgt = &E->getTargetNode(); + auto TgtIT = TargetInDegreeMap.find(Tgt); + if (TgtIT != TargetInDegreeMap.end()) + ++(TgtIT->second); + } + } + + LLVM_DEBUG({ + dbgs() << "Size of target in-degree map:" << TargetInDegreeMap.size() + << "\nContent of in-degree map:\n"; + for (auto &I : TargetInDegreeMap) { + dbgs() << I.first << " --> " << I.second << "\n"; + } + }); + + SmallVector<NodeType *, 32> Worklist(CandidateSourceNodes.begin(), + CandidateSourceNodes.end()); + while (!Worklist.empty()) { + NodeType &Src = *Worklist.pop_back_val(); + // As nodes get merged, we need to skip any node that has been removed from + // the candidate set (see below). + if (!CandidateSourceNodes.erase(&Src)) + continue; + + assert(Src.getEdges().size() == 1 && + "Expected a single edge from the candidate src node."); + NodeType &Tgt = Src.back().getTargetNode(); + assert(TargetInDegreeMap.find(&Tgt) != TargetInDegreeMap.end() && + "Expected target to be in the in-degree map."); + + if (TargetInDegreeMap[&Tgt] != 1) + continue; + + if (!areNodesMergeable(Src, Tgt)) + continue; + + // Do not merge if there is also an edge from target to src (immediate + // cycle). + if (Tgt.hasEdgeTo(Src)) + continue; + + LLVM_DEBUG(dbgs() << "Merging:" << Src << "\nWith:" << Tgt << "\n"); + + mergeNodes(Src, Tgt); + + // If the target node is in the candidate set itself, we need to put the + // src node back into the worklist again so it gives the target a chance + // to get merged into it. For example if we have: + // {(a)->(b), (b)->(c), (c)->(d), ...} and the worklist is initially {b, a}, + // then after merging (a) and (b) together, we need to put (a,b) back in + // the worklist so that (c) can get merged in as well resulting in + // {(a,b,c) -> d} + // We also need to remove the old target (b), from the worklist. We first + // remove it from the candidate set here, and skip any item from the + // worklist that is not in the set. + if (CandidateSourceNodes.erase(&Tgt)) { + Worklist.push_back(&Src); + CandidateSourceNodes.insert(&Src); + LLVM_DEBUG(dbgs() << "Putting " << &Src << " back in the worklist.\n"); + } + } + LLVM_DEBUG(dbgs() << "=== End of Graph Simplification ===\n"); +} + template <class G> void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() { diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp index 3d1be1e1cce09..343406c9bba16 100644 --- a/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -184,6 +184,17 @@ bool DivergenceAnalysis::inRegion(const BasicBlock &BB) const { return (!RegionLoop && BB.getParent() == &F) || RegionLoop->contains(&BB); } +static bool usesLiveOut(const Instruction &I, const Loop *DivLoop) { + for (auto &Op : I.operands()) { + auto *OpInst = dyn_cast<Instruction>(&Op); + if (!OpInst) + continue; + if (DivLoop->contains(OpInst->getParent())) + return true; + } + return false; +} + // marks all users of loop-carried values of the loop headed by LoopHeader as // divergent void DivergenceAnalysis::taintLoopLiveOuts(const BasicBlock &LoopHeader) { @@ -227,16 +238,14 @@ void DivergenceAnalysis::taintLoopLiveOuts(const BasicBlock &LoopHeader) { continue; if (isDivergent(I)) continue; + if (!usesLiveOut(I, DivLoop)) + continue; - for (auto &Op : I.operands()) { - auto *OpInst = dyn_cast<Instruction>(&Op); - if (!OpInst) - continue; - if (DivLoop->contains(OpInst->getParent())) { - markDivergent(I); - pushUsers(I); - break; - } + markDivergent(I); + if (I.isTerminator()) { + propagateBranchDivergence(I); + } else { + pushUsers(I); } } @@ -286,14 +295,11 @@ bool DivergenceAnalysis::propagateJoinDivergence(const BasicBlock &JoinBlock, // push non-divergent phi nodes in JoinBlock to the worklist pushPHINodes(JoinBlock); - // JoinBlock is a divergent loop exit - if (BranchLoop && !BranchLoop->contains(&JoinBlock)) { - return true; - } - // disjoint-paths divergent at JoinBlock markBlockJoinDivergent(JoinBlock); - return false; + + // JoinBlock is a divergent loop exit + return BranchLoop && !BranchLoop->contains(&JoinBlock); } void DivergenceAnalysis::propagateBranchDivergence(const Instruction &Term) { @@ -301,6 +307,10 @@ void DivergenceAnalysis::propagateBranchDivergence(const Instruction &Term) { markDivergent(Term); + // Don't propagate divergence from unreachable blocks. + if (!DT.isReachableFromEntry(Term.getParent())) + return; + const auto *BranchLoop = LI.getLoopFor(Term.getParent()); // whether there is a divergent loop exit from BranchLoop (if any) diff --git a/llvm/lib/Analysis/DomPrinter.cpp b/llvm/lib/Analysis/DomPrinter.cpp index 024a0fb499505..ebbe0d3e2c5fb 100644 --- a/llvm/lib/Analysis/DomPrinter.cpp +++ b/llvm/lib/Analysis/DomPrinter.cpp @@ -40,11 +40,11 @@ struct DOTGraphTraits<DomTreeNode*> : public DefaultDOTGraphTraits { if (isSimple()) - return DOTGraphTraits<const Function*> - ::getSimpleNodeLabel(BB, BB->getParent()); + return DOTGraphTraits<DOTFuncInfo *> + ::getSimpleNodeLabel(BB, nullptr); else - return DOTGraphTraits<const Function*> - ::getCompleteNodeLabel(BB, BB->getParent()); + return DOTGraphTraits<DOTFuncInfo *> + ::getCompleteNodeLabel(BB, nullptr); } }; diff --git a/llvm/lib/Analysis/DomTreeUpdater.cpp b/llvm/lib/Analysis/DomTreeUpdater.cpp index b374334ea3716..9594da0a4f91d 100644 --- a/llvm/lib/Analysis/DomTreeUpdater.cpp +++ b/llvm/lib/Analysis/DomTreeUpdater.cpp @@ -14,7 +14,7 @@ #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/PostDominators.h" -#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/GenericDomTree.h" #include <algorithm> #include <functional> @@ -507,7 +507,7 @@ LLVM_DUMP_METHOD void DomTreeUpdater::dump() const { OS << "Pending DeletedBBs:\n"; Index = 0; - for (auto BB : DeletedBBs) { + for (const auto *BB : DeletedBBs) { OS << " " << Index << " : "; ++Index; if (BB->hasName()) @@ -519,7 +519,7 @@ LLVM_DUMP_METHOD void DomTreeUpdater::dump() const { OS << "Pending Callbacks:\n"; Index = 0; - for (auto BB : Callbacks) { + for (const auto &BB : Callbacks) { OS << " " << Index << " : "; ++Index; if (BB->hasName()) diff --git a/llvm/lib/Analysis/GlobalsModRef.cpp b/llvm/lib/Analysis/GlobalsModRef.cpp index 4361e0dc9bbd5..8c8ccf04ebba2 100644 --- a/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/llvm/lib/Analysis/GlobalsModRef.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -28,6 +29,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" + using namespace llvm; #define DEBUG_TYPE "globalsmodref-aa" @@ -77,7 +79,7 @@ class GlobalsAAResult::FunctionInfo { static inline AlignedMap *getFromVoidPointer(void *P) { return (AlignedMap *)P; } - enum { NumLowBitsAvailable = 3 }; + static constexpr int NumLowBitsAvailable = 3; static_assert(alignof(AlignedMap) >= (1 << NumLowBitsAvailable), "AlignedMap insufficiently aligned to have enough low bits."); }; @@ -810,6 +812,14 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV, return true; } +bool GlobalsAAResult::invalidate(Module &, const PreservedAnalyses &PA, + ModuleAnalysisManager::Invalidator &) { + // Check whether the analysis has been explicitly invalidated. Otherwise, it's + // stateless and remains preserved. + auto PAC = PA.getChecker<GlobalsAA>(); + return !PAC.preservedWhenStateless(); +} + /// alias - If one of the pointers is to a global that we are tracking, and the /// other is some random pointer, we know there cannot be an alias, because the /// address of the global isn't taken. diff --git a/llvm/lib/Analysis/GuardUtils.cpp b/llvm/lib/Analysis/GuardUtils.cpp index d482832798581..cd132c56991f8 100644 --- a/llvm/lib/Analysis/GuardUtils.cpp +++ b/llvm/lib/Analysis/GuardUtils.cpp @@ -47,7 +47,7 @@ bool llvm::parseWidenableBranch(const User *U, Value *&Condition, Use *C, *WC; if (parseWidenableBranch(const_cast<User*>(U), C, WC, IfTrueBB, IfFalseBB)) { - if (C) + if (C) Condition = C->get(); else Condition = ConstantInt::getTrue(IfTrueBB->getContext()); @@ -66,10 +66,10 @@ bool llvm::parseWidenableBranch(User *U, Use *&C,Use *&WC, auto *Cond = BI->getCondition(); if (!Cond->hasOneUse()) return false; - + IfTrueBB = BI->getSuccessor(0); IfFalseBB = BI->getSuccessor(1); - + if (match(Cond, m_Intrinsic<Intrinsic::experimental_widenable_condition>())) { WC = &BI->getOperandUse(0); C = nullptr; @@ -88,7 +88,7 @@ bool llvm::parseWidenableBranch(User *U, Use *&C,Use *&WC, if (!And) // Could be a constexpr return false; - + if (match(A, m_Intrinsic<Intrinsic::experimental_widenable_condition>()) && A->hasOneUse()) { WC = &And->getOperandUse(0); diff --git a/llvm/lib/Analysis/HeatUtils.cpp b/llvm/lib/Analysis/HeatUtils.cpp new file mode 100644 index 0000000000000..a1a11be5fee3d --- /dev/null +++ b/llvm/lib/Analysis/HeatUtils.cpp @@ -0,0 +1,78 @@ +//===-- HeatUtils.cpp - Utility for printing heat colors --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Utility for printing heat colors based on heuristics or profiling +// information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/HeatUtils.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/IR/Instructions.h" + +namespace llvm { + +static const unsigned heatSize = 100; +static const char heatPalette[heatSize][8] = { + "#3d50c3", "#4055c8", "#4358cb", "#465ecf", "#4961d2", "#4c66d6", "#4f69d9", + "#536edd", "#5572df", "#5977e3", "#5b7ae5", "#5f7fe8", "#6282ea", "#6687ed", + "#6a8bef", "#6c8ff1", "#7093f3", "#7396f5", "#779af7", "#7a9df8", "#7ea1fa", + "#81a4fb", "#85a8fc", "#88abfd", "#8caffe", "#8fb1fe", "#93b5fe", "#96b7ff", + "#9abbff", "#9ebeff", "#a1c0ff", "#a5c3fe", "#a7c5fe", "#abc8fd", "#aec9fc", + "#b2ccfb", "#b5cdfa", "#b9d0f9", "#bbd1f8", "#bfd3f6", "#c1d4f4", "#c5d6f2", + "#c7d7f0", "#cbd8ee", "#cedaeb", "#d1dae9", "#d4dbe6", "#d6dce4", "#d9dce1", + "#dbdcde", "#dedcdb", "#e0dbd8", "#e3d9d3", "#e5d8d1", "#e8d6cc", "#ead5c9", + "#ecd3c5", "#eed0c0", "#efcebd", "#f1ccb8", "#f2cab5", "#f3c7b1", "#f4c5ad", + "#f5c1a9", "#f6bfa6", "#f7bca1", "#f7b99e", "#f7b599", "#f7b396", "#f7af91", + "#f7ac8e", "#f7a889", "#f6a385", "#f5a081", "#f59c7d", "#f4987a", "#f39475", + "#f29072", "#f08b6e", "#ef886b", "#ed8366", "#ec7f63", "#e97a5f", "#e8765c", + "#e57058", "#e36c55", "#e16751", "#de614d", "#dc5d4a", "#d85646", "#d65244", + "#d24b40", "#d0473d", "#cc403a", "#ca3b37", "#c53334", "#c32e31", "#be242e", + "#bb1b2c", "#b70d28"}; + +uint64_t +getNumOfCalls(Function &callerFunction, Function &calledFunction) { + uint64_t counter = 0; + for (User *U : calledFunction.users()) { + if (auto CI = dyn_cast<CallInst>(U)) { + if (CI->getCaller() == (&callerFunction)) { + counter += 1; + } + } + } + return counter; +} + +uint64_t getMaxFreq(const Function &F, const BlockFrequencyInfo *BFI) { + uint64_t maxFreq = 0; + for (const BasicBlock &BB : F) { + uint64_t freqVal = BFI->getBlockFreq(&BB).getFrequency(); + if (freqVal >= maxFreq) + maxFreq = freqVal; + } + return maxFreq; +} + +std::string getHeatColor(uint64_t freq, uint64_t maxFreq) { + if (freq > maxFreq) + freq = maxFreq; + double percent = (freq > 0) ? log2(double(freq)) / log2(maxFreq) : 0; + return getHeatColor(percent); +} + +std::string getHeatColor(double percent) { + if (percent > 1.0) + percent = 1.0; + if (percent < 0.0) + percent = 0.0; + unsigned colorId = unsigned(round(percent * (heatSize - 1.0))); + return heatPalette[colorId]; +} + +} // namespace llvm diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index ac81cba836f89..6686848d75c91 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" diff --git a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp index dc4cbc371ef43..c32aa0340cebe 100644 --- a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -15,7 +15,6 @@ #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/IndirectCallVisitor.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp new file mode 100644 index 0000000000000..74a536d1ce2f4 --- /dev/null +++ b/llvm/lib/Analysis/InlineAdvisor.cpp @@ -0,0 +1,408 @@ +//===- InlineAdvisor.cpp - analysis pass implementation -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements InlineAdvisorAnalysis and DefaultInlineAdvisor, and +// related types. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/InlineAdvisor.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +#include <sstream> + +using namespace llvm; +#define DEBUG_TYPE "inline" + +// This weirdly named statistic tracks the number of times that, when attempting +// to inline a function A into B, we analyze the callers of B in order to see +// if those would be more profitable and blocked inline steps. +STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); + +/// Flag to add inline messages as callsite attributes 'inline-remark'. +static cl::opt<bool> + InlineRemarkAttribute("inline-remark-attribute", cl::init(false), + cl::Hidden, + cl::desc("Enable adding inline-remark attribute to" + " callsites processed by inliner but decided" + " to be not inlined")); + +// An integer used to limit the cost of inline deferral. The default negative +// number tells shouldBeDeferred to only take the secondary cost into account. +static cl::opt<int> + InlineDeferralScale("inline-deferral-scale", + cl::desc("Scale to limit the cost of inline deferral"), + cl::init(2), cl::Hidden); + +namespace { +class DefaultInlineAdvice : public InlineAdvice { +public: + DefaultInlineAdvice(DefaultInlineAdvisor *Advisor, CallBase &CB, + Optional<InlineCost> OIC, OptimizationRemarkEmitter &ORE) + : InlineAdvice(Advisor, CB, ORE, OIC.hasValue()), OriginalCB(&CB), + OIC(OIC) {} + +private: + void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { + using namespace ore; + llvm::setInlineRemark(*OriginalCB, std::string(Result.getFailureReason()) + + "; " + inlineCostStr(*OIC)); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) + << NV("Callee", Callee) << " will not be inlined into " + << NV("Caller", Caller) << ": " + << NV("Reason", Result.getFailureReason()); + }); + } + + void recordInliningWithCalleeDeletedImpl() override { + emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC); + } + + void recordInliningImpl() override { + emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC); + } + +private: + CallBase *const OriginalCB; + Optional<InlineCost> OIC; +}; + +} // namespace + +llvm::Optional<llvm::InlineCost> +getDefaultInlineAdvice(CallBase &CB, FunctionAnalysisManager &FAM, + const InlineParams &Params) { + Function &Caller = *CB.getCaller(); + ProfileSummaryInfo *PSI = + FAM.getResult<ModuleAnalysisManagerFunctionProxy>(Caller) + .getCachedResult<ProfileSummaryAnalysis>( + *CB.getParent()->getParent()->getParent()); + + auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller); + auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { + return FAM.getResult<AssumptionAnalysis>(F); + }; + auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & { + return FAM.getResult<BlockFrequencyAnalysis>(F); + }; + auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { + return FAM.getResult<TargetLibraryAnalysis>(F); + }; + + auto GetInlineCost = [&](CallBase &CB) { + Function &Callee = *CB.getCalledFunction(); + auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee); + bool RemarksEnabled = + Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled( + DEBUG_TYPE); + return getInlineCost(CB, Params, CalleeTTI, GetAssumptionCache, GetTLI, + GetBFI, PSI, RemarksEnabled ? &ORE : nullptr); + }; + return llvm::shouldInline(CB, GetInlineCost, ORE, + Params.EnableDeferral.hasValue() && + Params.EnableDeferral.getValue()); +} + +std::unique_ptr<InlineAdvice> DefaultInlineAdvisor::getAdvice(CallBase &CB) { + auto OIC = getDefaultInlineAdvice(CB, FAM, Params); + return std::make_unique<DefaultInlineAdvice>( + this, CB, OIC, + FAM.getResult<OptimizationRemarkEmitterAnalysis>(*CB.getCaller())); +} + +InlineAdvice::InlineAdvice(InlineAdvisor *Advisor, CallBase &CB, + OptimizationRemarkEmitter &ORE, + bool IsInliningRecommended) + : Advisor(Advisor), Caller(CB.getCaller()), Callee(CB.getCalledFunction()), + DLoc(CB.getDebugLoc()), Block(CB.getParent()), ORE(ORE), + IsInliningRecommended(IsInliningRecommended) {} + +void InlineAdvisor::markFunctionAsDeleted(Function *F) { + assert((!DeletedFunctions.count(F)) && + "Cannot put cause a function to become dead twice!"); + DeletedFunctions.insert(F); +} + +void InlineAdvisor::freeDeletedFunctions() { + for (auto *F : DeletedFunctions) + delete F; + DeletedFunctions.clear(); +} + +void InlineAdvice::recordInliningWithCalleeDeleted() { + markRecorded(); + Advisor->markFunctionAsDeleted(Callee); + recordInliningWithCalleeDeletedImpl(); +} + +AnalysisKey InlineAdvisorAnalysis::Key; + +bool InlineAdvisorAnalysis::Result::tryCreate(InlineParams Params, + InliningAdvisorMode Mode) { + auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + switch (Mode) { + case InliningAdvisorMode::Default: + Advisor.reset(new DefaultInlineAdvisor(FAM, Params)); + break; + case InliningAdvisorMode::Development: + // To be added subsequently under conditional compilation. + break; + case InliningAdvisorMode::Release: +#ifdef LLVM_HAVE_TF_AOT + Advisor = llvm::getReleaseModeAdvisor(M, MAM); +#endif + break; + } + return !!Advisor; +} + +/// Return true if inlining of CB can block the caller from being +/// inlined which is proved to be more beneficial. \p IC is the +/// estimated inline cost associated with callsite \p CB. +/// \p TotalSecondaryCost will be set to the estimated cost of inlining the +/// caller if \p CB is suppressed for inlining. +static bool +shouldBeDeferred(Function *Caller, InlineCost IC, int &TotalSecondaryCost, + function_ref<InlineCost(CallBase &CB)> GetInlineCost) { + // For now we only handle local or inline functions. + if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage()) + return false; + // If the cost of inlining CB is non-positive, it is not going to prevent the + // caller from being inlined into its callers and hence we don't need to + // defer. + if (IC.getCost() <= 0) + return false; + // Try to detect the case where the current inlining candidate caller (call + // it B) is a static or linkonce-ODR function and is an inlining candidate + // elsewhere, and the current candidate callee (call it C) is large enough + // that inlining it into B would make B too big to inline later. In these + // circumstances it may be best not to inline C into B, but to inline B into + // its callers. + // + // This only applies to static and linkonce-ODR functions because those are + // expected to be available for inlining in the translation units where they + // are used. Thus we will always have the opportunity to make local inlining + // decisions. Importantly the linkonce-ODR linkage covers inline functions + // and templates in C++. + // + // FIXME: All of this logic should be sunk into getInlineCost. It relies on + // the internal implementation of the inline cost metrics rather than + // treating them as truly abstract units etc. + TotalSecondaryCost = 0; + // The candidate cost to be imposed upon the current function. + int CandidateCost = IC.getCost() - 1; + // If the caller has local linkage and can be inlined to all its callers, we + // can apply a huge negative bonus to TotalSecondaryCost. + bool ApplyLastCallBonus = Caller->hasLocalLinkage() && !Caller->hasOneUse(); + // This bool tracks what happens if we DO inline C into B. + bool InliningPreventsSomeOuterInline = false; + unsigned NumCallerUsers = 0; + for (User *U : Caller->users()) { + CallBase *CS2 = dyn_cast<CallBase>(U); + + // If this isn't a call to Caller (it could be some other sort + // of reference) skip it. Such references will prevent the caller + // from being removed. + if (!CS2 || CS2->getCalledFunction() != Caller) { + ApplyLastCallBonus = false; + continue; + } + + InlineCost IC2 = GetInlineCost(*CS2); + ++NumCallerCallersAnalyzed; + if (!IC2) { + ApplyLastCallBonus = false; + continue; + } + if (IC2.isAlways()) + continue; + + // See if inlining of the original callsite would erase the cost delta of + // this callsite. We subtract off the penalty for the call instruction, + // which we would be deleting. + if (IC2.getCostDelta() <= CandidateCost) { + InliningPreventsSomeOuterInline = true; + TotalSecondaryCost += IC2.getCost(); + NumCallerUsers++; + } + } + + if (!InliningPreventsSomeOuterInline) + return false; + + // If all outer calls to Caller would get inlined, the cost for the last + // one is set very low by getInlineCost, in anticipation that Caller will + // be removed entirely. We did not account for this above unless there + // is only one caller of Caller. + if (ApplyLastCallBonus) + TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus; + + // If InlineDeferralScale is negative, then ignore the cost of primary + // inlining -- IC.getCost() multiplied by the number of callers to Caller. + if (InlineDeferralScale < 0) + return TotalSecondaryCost < IC.getCost(); + + int TotalCost = TotalSecondaryCost + IC.getCost() * NumCallerUsers; + int Allowance = IC.getCost() * InlineDeferralScale; + return TotalCost < Allowance; +} + +namespace llvm { +static std::basic_ostream<char> &operator<<(std::basic_ostream<char> &R, + const ore::NV &Arg) { + return R << Arg.Val; +} + +template <class RemarkT> +RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) { + using namespace ore; + if (IC.isAlways()) { + R << "(cost=always)"; + } else if (IC.isNever()) { + R << "(cost=never)"; + } else { + R << "(cost=" << ore::NV("Cost", IC.getCost()) + << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")"; + } + if (const char *Reason = IC.getReason()) + R << ": " << ore::NV("Reason", Reason); + return R; +} +} // namespace llvm + +std::string llvm::inlineCostStr(const InlineCost &IC) { + std::stringstream Remark; + Remark << IC; + return Remark.str(); +} + +void llvm::setInlineRemark(CallBase &CB, StringRef Message) { + if (!InlineRemarkAttribute) + return; + + Attribute Attr = Attribute::get(CB.getContext(), "inline-remark", Message); + CB.addAttribute(AttributeList::FunctionIndex, Attr); +} + +/// Return the cost only if the inliner should attempt to inline at the given +/// CallSite. If we return the cost, we will emit an optimisation remark later +/// using that cost, so we won't do so from this function. Return None if +/// inlining should not be attempted. +Optional<InlineCost> +llvm::shouldInline(CallBase &CB, + function_ref<InlineCost(CallBase &CB)> GetInlineCost, + OptimizationRemarkEmitter &ORE, bool EnableDeferral) { + using namespace ore; + + InlineCost IC = GetInlineCost(CB); + Instruction *Call = &CB; + Function *Callee = CB.getCalledFunction(); + Function *Caller = CB.getCaller(); + + if (IC.isAlways()) { + LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) + << ", Call: " << CB << "\n"); + return IC; + } + + if (!IC) { + LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC) + << ", Call: " << CB << "\n"); + if (IC.isNever()) { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) + << NV("Callee", Callee) << " not inlined into " + << NV("Caller", Caller) << " because it should never be inlined " + << IC; + }); + } else { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call) + << NV("Callee", Callee) << " not inlined into " + << NV("Caller", Caller) << " because too costly to inline " + << IC; + }); + } + setInlineRemark(CB, inlineCostStr(IC)); + return None; + } + + int TotalSecondaryCost = 0; + if (EnableDeferral && + shouldBeDeferred(Caller, IC, TotalSecondaryCost, GetInlineCost)) { + LLVM_DEBUG(dbgs() << " NOT Inlining: " << CB + << " Cost = " << IC.getCost() + << ", outer Cost = " << TotalSecondaryCost << '\n'); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts", + Call) + << "Not inlining. Cost of inlining " << NV("Callee", Callee) + << " increases the cost of inlining " << NV("Caller", Caller) + << " in other contexts"; + }); + setInlineRemark(CB, "deferred"); + // IC does not bool() to false, so get an InlineCost that will. + // This will not be inspected to make an error message. + return None; + } + + LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) << ", Call: " << CB + << '\n'); + return IC; +} + +void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) { + if (!DLoc.get()) + return; + + bool First = true; + Remark << " at callsite "; + for (DILocation *DIL = DLoc.get(); DIL; DIL = DIL->getInlinedAt()) { + if (!First) + Remark << " @ "; + unsigned int Offset = DIL->getLine(); + Offset -= DIL->getScope()->getSubprogram()->getLine(); + unsigned int Discriminator = DIL->getBaseDiscriminator(); + StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); + if (Name.empty()) + Name = DIL->getScope()->getSubprogram()->getName(); + Remark << Name << ":" << ore::NV("Line", Offset); + if (Discriminator) + Remark << "." << ore::NV("Disc", Discriminator); + First = false; + } +} + +void llvm::emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, + const BasicBlock *Block, const Function &Callee, + const Function &Caller, const InlineCost &IC, + bool ForProfileContext, const char *PassName) { + ORE.emit([&]() { + bool AlwaysInline = IC.isAlways(); + StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined"; + OptimizationRemark Remark(PassName ? PassName : DEBUG_TYPE, RemarkName, + DLoc, Block); + Remark << ore::NV("Callee", &Callee) << " inlined into "; + Remark << ore::NV("Caller", &Caller); + if (ForProfileContext) + Remark << " to match profiling context"; + Remark << " with " << IC; + addLocationToRemarks(Remark, DLoc); + return Remark; + }); +} diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index de83a48aad16a..33d714406d7f2 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -24,9 +24,11 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/llvm-config.h" +#include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -38,6 +40,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -46,6 +49,15 @@ using namespace llvm; STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); +static cl::opt<int> + DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), + cl::ZeroOrMore, + cl::desc("Default amount of inlining to perform")); + +static cl::opt<bool> PrintInstructionComments( + "print-instruction-comments", cl::Hidden, cl::init(false), + cl::desc("Prints comments for instruction based on inline cost analysis")); + static cl::opt<int> InlineThreshold( "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)")); @@ -92,8 +104,52 @@ static cl::opt<bool> OptComputeFullInlineCost( cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold.")); +static cl::opt<bool> InlineCallerSupersetNoBuiltin( + "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), + cl::ZeroOrMore, + cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " + "attributes.")); + +static cl::opt<bool> DisableGEPConstOperand( + "disable-gep-const-evaluation", cl::Hidden, cl::init(false), + cl::desc("Disables evaluation of GetElementPtr with constant operands")); + namespace { class InlineCostCallAnalyzer; + +// This struct is used to store information about inline cost of a +// particular instruction +struct InstructionCostDetail { + int CostBefore = 0; + int CostAfter = 0; + int ThresholdBefore = 0; + int ThresholdAfter = 0; + + int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; } + + int getCostDelta() const { return CostAfter - CostBefore; } + + bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; } +}; + +class InlineCostAnnotationWriter : public AssemblyAnnotationWriter { +private: + InlineCostCallAnalyzer *const ICCA; + +public: + InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {} + virtual void emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS) override; +}; + +/// Carry out call site analysis, in order to evaluate inlinability. +/// NOTE: the type is currently used as implementation detail of functions such +/// as llvm::getInlineCost. Note the function_ref constructor parameters - the +/// expectation is that they come from the outer scope, from the wrapper +/// functions. If we want to support constructing CallAnalyzer objects where +/// lambdas are provided inline at construction, or where the object needs to +/// otherwise survive past the scope of the provided functions, we need to +/// revisit the argument types. class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { typedef InstVisitor<CallAnalyzer, bool> Base; friend class InstVisitor<CallAnalyzer, bool>; @@ -104,10 +160,10 @@ protected: const TargetTransformInfo &TTI; /// Getter for the cache of @llvm.assume intrinsics. - std::function<AssumptionCache &(Function &)> &GetAssumptionCache; + function_ref<AssumptionCache &(Function &)> GetAssumptionCache; /// Getter for BlockFrequencyInfo - Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI; + function_ref<BlockFrequencyInfo &(Function &)> GetBFI; /// Profile summary information. ProfileSummaryInfo *PSI; @@ -130,11 +186,16 @@ protected: /// Called after a basic block was analyzed. virtual void onBlockAnalyzed(const BasicBlock *BB) {} + /// Called before an instruction was analyzed + virtual void onInstructionAnalysisStart(const Instruction *I) {} + + /// Called after an instruction was analyzed + virtual void onInstructionAnalysisFinish(const Instruction *I) {} + /// Called at the end of the analysis of the callsite. Return the outcome of /// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or /// the reason it can't. - virtual InlineResult finalizeAnalysis() { return true; } - + virtual InlineResult finalizeAnalysis() { return InlineResult::success(); } /// Called when we're about to start processing a basic block, and every time /// we are done processing an instruction. Return true if there is no point in /// continuing the analysis (e.g. we've determined already the call site is @@ -145,8 +206,7 @@ protected: /// contexts propagated). It checks callsite-specific information. Return a /// reason analysis can't continue if that's the case, or 'true' if it may /// continue. - virtual InlineResult onAnalysisStart() { return true; } - + virtual InlineResult onAnalysisStart() { return InlineResult::success(); } /// Called if the analysis engine decides SROA cannot be done for the given /// alloca. virtual void onDisableSROA(AllocaInst *Arg) {} @@ -187,7 +247,7 @@ protected: /// Called to account for any other instruction not specifically accounted /// for. - virtual void onCommonInstructionSimplification() {} + virtual void onMissedSimplification() {} /// Start accounting potential benefits due to SROA for the given alloca. virtual void onInitializeSROAArg(AllocaInst *Arg) {} @@ -236,9 +296,7 @@ protected: DenseMap<Value *, AllocaInst *> SROAArgValues; /// Keep track of Allocas for which we believe we may get SROA optimization. - /// We don't delete entries in SROAArgValue because we still want - /// isAllocaDerivedArg to function correctly. - DenseSet<AllocaInst *> EnabledSROAArgValues; + DenseSet<AllocaInst *> EnabledSROAAllocas; /// Keep track of values which map to a pointer base and constant offset. DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs; @@ -258,8 +316,7 @@ protected: AllocaInst *getSROAArgForValueOrNull(Value *V) const { auto It = SROAArgValues.find(V); - if (It == SROAArgValues.end() || - EnabledSROAArgValues.count(It->second) == 0) + if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0) return nullptr; return It->second; } @@ -337,17 +394,24 @@ protected: bool visitUnreachableInst(UnreachableInst &I); public: - CallAnalyzer(const TargetTransformInfo &TTI, - std::function<AssumptionCache &(Function &)> &GetAssumptionCache, - Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI, - ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, - Function &Callee, CallBase &Call) + CallAnalyzer( + Function &Callee, CallBase &Call, const TargetTransformInfo &TTI, + function_ref<AssumptionCache &(Function &)> GetAssumptionCache, + function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr, + ProfileSummaryInfo *PSI = nullptr, + OptimizationRemarkEmitter *ORE = nullptr) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), CandidateCall(Call), EnableLoadElimination(true) {} InlineResult analyze(); + Optional<Constant*> getSimplifiedValue(Instruction *I) { + if (SimplifiedValues.find(I) != SimplifiedValues.end()) + return SimplifiedValues[I]; + return None; + } + // Keep a bunch of stats about the cost savings found so we can print them // out when debugging. unsigned NumConstantArgs = 0; @@ -375,6 +439,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { /// Tunable parameters that control the analysis. const InlineParams &Params; + // This DenseMap stores the delta change in cost and threshold after + // accounting for the given instruction. The map is filled only with the + // flag PrintInstructionComments on. + DenseMap<const Instruction *, InstructionCostDetail> InstructionCostDetailMap; + /// Upper bound for the inlining cost. Bonuses are being applied to account /// for speculative "expected profit" of the inlining decision. int Threshold = 0; @@ -382,6 +451,9 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { /// Attempt to evaluate indirect calls to boost its inline cost. const bool BoostIndirectCalls; + /// Ignore the threshold when finalizing analysis. + const bool IgnoreThreshold; + /// Inlining cost measured in abstract units, accounts for all the /// instructions expected to be executed for a given function invocation. /// Instructions that are statically proven to be dead based on call-site @@ -456,9 +528,9 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { InlineConstants::IndirectCallThreshold; /// FIXME: if InlineCostCallAnalyzer is derived from, this may need /// to instantiate the derived class. - InlineCostCallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, - Call, IndirectCallParams, false); - if (CA.analyze()) { + InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI, + GetAssumptionCache, GetBFI, PSI, ORE, false); + if (CA.analyze().isSuccess()) { // We were able to inline the indirect call! Subtract the cost from the // threshold to get the bonus we want to apply, but don't go below zero. Cost -= std::max(0, CA.getThreshold() - CA.getCost()); @@ -507,7 +579,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { addCost(SwitchCost, (int64_t)CostUpperBound); } - void onCommonInstructionSimplification() override { + void onMissedSimplification() override { addCost(InlineConstants::InstrCost); } @@ -515,7 +587,6 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { assert(Arg != nullptr && "Should not initialize SROA costs for null value."); SROAArgCosts[Arg] = 0; - EnabledSROAArgValues.insert(Arg); } void onAggregateSROAUse(AllocaInst *SROAArg) override { @@ -538,6 +609,25 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { SingleBB = false; } } + + void onInstructionAnalysisStart(const Instruction *I) override { + // This function is called to store the initial cost of inlining before + // the given instruction was assessed. + if (!PrintInstructionComments) + return; + InstructionCostDetailMap[I].CostBefore = Cost; + InstructionCostDetailMap[I].ThresholdBefore = Threshold; + } + + void onInstructionAnalysisFinish(const Instruction *I) override { + // This function is called to find new values of cost and threshold after + // the instruction has been assessed. + if (!PrintInstructionComments) + return; + InstructionCostDetailMap[I].CostAfter = Cost; + InstructionCostDetailMap[I].ThresholdAfter = Threshold; + } + InlineResult finalizeAnalysis() override { // Loops generally act a lot like calls in that they act like barriers to // movement, require a certain amount of setup, etc. So when optimising for @@ -566,12 +656,14 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { else if (NumVectorInstructions <= NumInstructions / 2) Threshold -= VectorBonus / 2; - return Cost < std::max(1, Threshold); + if (IgnoreThreshold || Cost < std::max(1, Threshold)) + return InlineResult::success(); + return InlineResult::failure("Cost over threshold."); } bool shouldStop() override { // Bail out the moment we cross the threshold. This means we'll under-count // the cost, but only when undercounting doesn't matter. - return Cost >= Threshold && !ComputeFullInlineCost; + return !IgnoreThreshold && Cost >= Threshold && !ComputeFullInlineCost; } void onLoadEliminationOpportunity() override { @@ -618,25 +710,42 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { // Check if we're done. This can happen due to bonuses and penalties. if (Cost >= Threshold && !ComputeFullInlineCost) - return "high cost"; + return InlineResult::failure("high cost"); - return true; + return InlineResult::success(); } public: InlineCostCallAnalyzer( + Function &Callee, CallBase &Call, const InlineParams &Params, const TargetTransformInfo &TTI, - std::function<AssumptionCache &(Function &)> &GetAssumptionCache, - Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI, - ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee, - CallBase &Call, const InlineParams &Params, bool BoostIndirect = true) - : CallAnalyzer(TTI, GetAssumptionCache, GetBFI, PSI, ORE, Callee, Call), + function_ref<AssumptionCache &(Function &)> GetAssumptionCache, + function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr, + ProfileSummaryInfo *PSI = nullptr, + OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true, + bool IgnoreThreshold = false) + : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE), ComputeFullInlineCost(OptComputeFullInlineCost || Params.ComputeFullInlineCost || ORE), Params(Params), Threshold(Params.DefaultThreshold), - BoostIndirectCalls(BoostIndirect) {} + BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold), + Writer(this) {} + + /// Annotation Writer for instruction details + InlineCostAnnotationWriter Writer; + void dump(); + // Prints the same analysis as dump(), but its definition is not dependent + // on the build. + void print(); + + Optional<InstructionCostDetail> getCostDetails(const Instruction *I) { + if (InstructionCostDetailMap.find(I) != InstructionCostDetailMap.end()) + return InstructionCostDetailMap[I]; + return None; + } + virtual ~InlineCostCallAnalyzer() {} int getThreshold() { return Threshold; } int getCost() { return Cost; } @@ -650,9 +759,35 @@ bool CallAnalyzer::isAllocaDerivedArg(Value *V) { void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) { onDisableSROA(SROAArg); - EnabledSROAArgValues.erase(SROAArg); + EnabledSROAAllocas.erase(SROAArg); disableLoadElimination(); } + +void InlineCostAnnotationWriter::emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS) { + // The cost of inlining of the given instruction is printed always. + // The threshold delta is printed only when it is non-zero. It happens + // when we decided to give a bonus at a particular instruction. + Optional<InstructionCostDetail> Record = ICCA->getCostDetails(I); + if (!Record) + OS << "; No analysis for the instruction"; + else { + OS << "; cost before = " << Record->CostBefore + << ", cost after = " << Record->CostAfter + << ", threshold before = " << Record->ThresholdBefore + << ", threshold after = " << Record->ThresholdAfter << ", "; + OS << "cost delta = " << Record->getCostDelta(); + if (Record->hasThresholdChanged()) + OS << ", threshold delta = " << Record->getThresholdDelta(); + } + auto C = ICCA->getSimplifiedValue(const_cast<Instruction *>(I)); + if (C) { + OS << ", simplified to "; + C.getValue()->print(OS, true); + } + OS << "\n"; +} + /// If 'V' maps to a SROA candidate, disable SROA for it. void CallAnalyzer::disableSROA(Value *V) { if (auto *SROAArg = getSROAArgForValueOrNull(V)) { @@ -711,7 +846,9 @@ bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) { Operands.push_back(SimpleOp); else Operands.push_back(*I); - return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands); + return TargetTransformInfo::TCC_Free == + TTI.getUserCost(&GEP, Operands, + TargetTransformInfo::TCK_SizeAndLatency); } bool CallAnalyzer::visitAlloca(AllocaInst &I) { @@ -720,10 +857,22 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { if (I.isArrayAllocation()) { Constant *Size = SimplifiedValues.lookup(I.getArraySize()); if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) { + // Sometimes a dynamic alloca could be converted into a static alloca + // after this constant prop, and become a huge static alloca on an + // unconditional CFG path. Avoid inlining if this is going to happen above + // a threshold. + // FIXME: If the threshold is removed or lowered too much, we could end up + // being too pessimistic and prevent inlining non-problematic code. This + // could result in unintended perf regressions. A better overall strategy + // is needed to track stack usage during inlining. Type *Ty = I.getAllocatedType(); AllocatedSize = SaturatingMultiplyAdd( AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getFixedSize(), AllocatedSize); + if (AllocatedSize > InlineConstants::MaxSimplifiedDynamicAllocaToInline) { + HasDynamicAlloca = true; + return false; + } return Base::visitAlloca(I); } } @@ -874,6 +1023,16 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { return true; }; + if (!DisableGEPConstOperand) + if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { + SmallVector<Constant *, 2> Indices; + for (unsigned int Index = 1 ; Index < COps.size() ; ++Index) + Indices.push_back(COps[Index]); + return ConstantExpr::getGetElementPtr(I.getSourceElementType(), COps[0], + Indices, I.isInBounds()); + })) + return true; + if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) { if (SROAArg) SROAArgValues[&I] = SROAArg; @@ -959,7 +1118,8 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0))) SROAArgValues[&I] = SROAArg; - return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); + return TargetTransformInfo::TCC_Free == + TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); } bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { @@ -983,7 +1143,8 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { if (auto *SROAArg = getSROAArgForValueOrNull(Op)) SROAArgValues[&I] = SROAArg; - return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); + return TargetTransformInfo::TCC_Free == + TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); } bool CallAnalyzer::visitCastInst(CastInst &I) { @@ -993,7 +1154,8 @@ bool CallAnalyzer::visitCastInst(CastInst &I) { })) return true; - // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. + // Disable SROA in the face of arbitrary casts we don't explicitly list + // elsewhere. disableSROA(I.getOperand(0)); // If this is a floating-point cast, and the target says this operation @@ -1013,7 +1175,8 @@ bool CallAnalyzer::visitCastInst(CastInst &I) { break; } - return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); + return TargetTransformInfo::TCC_Free == + TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); } bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { @@ -1085,7 +1248,7 @@ bool InlineCostCallAnalyzer::isColdCallSite(CallBase &Call, // If global profile summary is available, then callsite's coldness is // determined based on that. if (PSI && PSI->hasProfileSummary()) - return PSI->isColdCallSite(CallSite(&Call), CallerBFI); + return PSI->isColdCallSite(Call, CallerBFI); // Otherwise we need BFI to be available. if (!CallerBFI) @@ -1109,8 +1272,7 @@ InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call, // If global profile summary is available, then callsite's hotness is // determined based on that. - if (PSI && PSI->hasProfileSummary() && - PSI->isHotCallSite(CallSite(&Call), CallerBFI)) + if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI)) return Params.HotCallSiteThreshold; // Otherwise we need BFI to be available and to have a locally hot callsite @@ -1200,7 +1362,7 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) { // Callsite hotness and coldness can be determined if sample profile is // used (which adds hotness metadata to calls) or if caller's // BlockFrequencyInfo is available. - BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr; + BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr; auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI); if (!Caller->hasOptSize() && HotCallSiteThreshold) { LLVM_DEBUG(dbgs() << "Hot callsite.\n"); @@ -1667,7 +1829,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { // does not (yet) fire. unsigned JumpTableSize = 0; - BlockFrequencyInfo *BFI = GetBFI ? &((*GetBFI)(F)) : nullptr; + BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(F)) : nullptr; unsigned NumCaseCluster = TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize, PSI, BFI); @@ -1716,7 +1878,8 @@ bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) { bool CallAnalyzer::visitInstruction(Instruction &I) { // Some instructions are free. All of the free intrinsics can also be // handled by SROA, etc. - if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I)) + if (TargetTransformInfo::TCC_Free == + TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency)) return true; // We found something we don't understand or can't handle. Mark any SROA-able @@ -1761,33 +1924,36 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB, // all of the per-instruction logic. The visit tree returns true if we // consumed the instruction in any way, and false if the instruction's base // cost should count against inlining. + onInstructionAnalysisStart(&*I); + if (Base::visit(&*I)) ++NumInstructionsSimplified; else - onCommonInstructionSimplification(); + onMissedSimplification(); + onInstructionAnalysisFinish(&*I); using namespace ore; // If the visit this instruction detected an uninlinable pattern, abort. - InlineResult IR; + InlineResult IR = InlineResult::success(); if (IsRecursiveCall) - IR = "recursive"; + IR = InlineResult::failure("recursive"); else if (ExposesReturnsTwice) - IR = "exposes returns twice"; + IR = InlineResult::failure("exposes returns twice"); else if (HasDynamicAlloca) - IR = "dynamic alloca"; + IR = InlineResult::failure("dynamic alloca"); else if (HasIndirectBr) - IR = "indirect branch"; + IR = InlineResult::failure("indirect branch"); else if (HasUninlineableIntrinsic) - IR = "uninlinable intrinsic"; + IR = InlineResult::failure("uninlinable intrinsic"); else if (InitsVargArgs) - IR = "varargs"; - if (!IR) { + IR = InlineResult::failure("varargs"); + if (!IR.isSuccess()) { if (ORE) ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CandidateCall) << NV("Callee", &F) << " has uninlinable pattern (" - << NV("InlineResult", IR.message) + << NV("InlineResult", IR.getFailureReason()) << ") and cost is not fully computed"; }); return IR; @@ -1798,22 +1964,25 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB, // the caller stack usage dramatically. if (IsCallerRecursive && AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) { - InlineResult IR = "recursive and allocates too much stack space"; + auto IR = + InlineResult::failure("recursive and allocates too much stack space"); if (ORE) ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CandidateCall) - << NV("Callee", &F) << " is " << NV("InlineResult", IR.message) + << NV("Callee", &F) << " is " + << NV("InlineResult", IR.getFailureReason()) << ". Cost is not fully computed"; }); return IR; } if (shouldStop()) - return false; + return InlineResult::failure( + "Call site analysis is not favorable to inlining."); } - return true; + return InlineResult::success(); } /// Compute the base pointer and cumulative constant offsets for V. @@ -1904,11 +2073,11 @@ InlineResult CallAnalyzer::analyze() { ++NumCallsAnalyzed; auto Result = onAnalysisStart(); - if (!Result) + if (!Result.isSuccess()) return Result; if (F.empty()) - return true; + return InlineResult::success(); Function *Caller = CandidateCall.getFunction(); // Check if the caller function is recursive itself. @@ -1937,6 +2106,7 @@ InlineResult CallAnalyzer::analyze() { if (auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) { SROAArgValues[&*FAI] = SROAArg; onInitializeSROAArg(SROAArg); + EnabledSROAAllocas.insert(SROAArg); } } } @@ -1983,12 +2153,12 @@ InlineResult CallAnalyzer::analyze() { if (BB->hasAddressTaken()) for (User *U : BlockAddress::get(&*BB)->users()) if (!isa<CallBrInst>(*U)) - return "blockaddress used outside of callbr"; + return InlineResult::failure("blockaddress used outside of callbr"); // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. InlineResult IR = analyzeBlock(BB, EphValues); - if (!IR) + if (!IR.isSuccess()) return IR; Instruction *TI = BB->getTerminator(); @@ -2034,15 +2204,15 @@ InlineResult CallAnalyzer::analyze() { // inlining this would cause the removal of the caller (so the instruction // is not actually duplicated, just moved). if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) - return "noduplicate"; + return InlineResult::failure("noduplicate"); return finalizeAnalysis(); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -/// Dump stats about this call's analysis. -LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { +void InlineCostCallAnalyzer::print() { #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n" + if (PrintInstructionComments) + F.print(dbgs(), &Writer); DEBUG_PRINT_STAT(NumConstantArgs); DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); DEBUG_PRINT_STAT(NumAllocaArgs); @@ -2058,14 +2228,27 @@ LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { DEBUG_PRINT_STAT(Threshold); #undef DEBUG_PRINT_STAT } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +/// Dump stats about this call's analysis. +LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { + print(); +} #endif /// Test that there are no attribute conflicts between Caller and Callee /// that prevent inlining. -static bool functionsHaveCompatibleAttributes(Function *Caller, - Function *Callee, - TargetTransformInfo &TTI) { +static bool functionsHaveCompatibleAttributes( + Function *Caller, Function *Callee, TargetTransformInfo &TTI, + function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) { + // Note that CalleeTLI must be a copy not a reference. The legacy pass manager + // caches the most recently created TLI in the TargetLibraryInfoWrapperPass + // object, and always returns the same object (which is overwritten on each + // GetTLI call). Therefore we copy the first result. + auto CalleeTLI = GetTLI(*Callee); return TTI.areInlineCompatible(Caller, Callee) && + GetTLI(*Caller).areInlineCompatible(CalleeTLI, + InlineCallerSupersetNoBuiltin) && AttributeFuncs::areInlineCompatible(*Caller, *Callee); } @@ -2104,23 +2287,46 @@ int llvm::getCallsiteCost(CallBase &Call, const DataLayout &DL) { InlineCost llvm::getInlineCost( CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, - std::function<AssumptionCache &(Function &)> &GetAssumptionCache, - Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, + function_ref<AssumptionCache &(Function &)> GetAssumptionCache, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI, + function_ref<BlockFrequencyInfo &(Function &)> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI, - GetAssumptionCache, GetBFI, PSI, ORE); + GetAssumptionCache, GetTLI, GetBFI, PSI, ORE); } -InlineCost llvm::getInlineCost( - CallBase &Call, Function *Callee, const InlineParams &Params, - TargetTransformInfo &CalleeTTI, - std::function<AssumptionCache &(Function &)> &GetAssumptionCache, - Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, +Optional<int> llvm::getInliningCostEstimate( + CallBase &Call, TargetTransformInfo &CalleeTTI, + function_ref<AssumptionCache &(Function &)> GetAssumptionCache, + function_ref<BlockFrequencyInfo &(Function &)> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { + const InlineParams Params = {/* DefaultThreshold*/ 0, + /*HintThreshold*/ {}, + /*ColdThreshold*/ {}, + /*OptSizeThreshold*/ {}, + /*OptMinSizeThreshold*/ {}, + /*HotCallSiteThreshold*/ {}, + /*LocallyHotCallSiteThreshold*/ {}, + /*ColdCallSiteThreshold*/ {}, + /*ComputeFullInlineCost*/ true, + /*EnableDeferral*/ true}; + + InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI, + GetAssumptionCache, GetBFI, PSI, ORE, true, + /*IgnoreThreshold*/ true); + auto R = CA.analyze(); + if (!R.isSuccess()) + return None; + return CA.getCost(); +} + +Optional<InlineResult> llvm::getAttributeBasedInliningDecision( + CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI) { // Cannot inline indirect calls. if (!Callee) - return llvm::InlineCost::getNever("indirect call"); + return InlineResult::failure("indirect call"); // Never inline calls with byval arguments that does not have the alloca // address space. Since byval arguments can be replaced with a copy to an @@ -2132,59 +2338,80 @@ InlineCost llvm::getInlineCost( if (Call.isByValArgument(I)) { PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType()); if (PTy->getAddressSpace() != AllocaAS) - return llvm::InlineCost::getNever("byval arguments without alloca" - " address space"); + return InlineResult::failure("byval arguments without alloca" + " address space"); } // Calls to functions with always-inline attributes should be inlined // whenever possible. if (Call.hasFnAttr(Attribute::AlwaysInline)) { auto IsViable = isInlineViable(*Callee); - if (IsViable) - return llvm::InlineCost::getAlways("always inline attribute"); - return llvm::InlineCost::getNever(IsViable.message); + if (IsViable.isSuccess()) + return InlineResult::success(); + return InlineResult::failure(IsViable.getFailureReason()); } // Never inline functions with conflicting attributes (unless callee has // always-inline attribute). Function *Caller = Call.getCaller(); - if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI)) - return llvm::InlineCost::getNever("conflicting attributes"); + if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI)) + return InlineResult::failure("conflicting attributes"); // Don't inline this call if the caller has the optnone attribute. if (Caller->hasOptNone()) - return llvm::InlineCost::getNever("optnone attribute"); + return InlineResult::failure("optnone attribute"); // Don't inline a function that treats null pointer as valid into a caller // that does not have this attribute. if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined()) - return llvm::InlineCost::getNever("nullptr definitions incompatible"); + return InlineResult::failure("nullptr definitions incompatible"); // Don't inline functions which can be interposed at link-time. if (Callee->isInterposable()) - return llvm::InlineCost::getNever("interposable"); + return InlineResult::failure("interposable"); // Don't inline functions marked noinline. if (Callee->hasFnAttribute(Attribute::NoInline)) - return llvm::InlineCost::getNever("noinline function attribute"); + return InlineResult::failure("noinline function attribute"); // Don't inline call sites marked noinline. if (Call.isNoInline()) - return llvm::InlineCost::getNever("noinline call site attribute"); + return InlineResult::failure("noinline call site attribute"); + + return None; +} + +InlineCost llvm::getInlineCost( + CallBase &Call, Function *Callee, const InlineParams &Params, + TargetTransformInfo &CalleeTTI, + function_ref<AssumptionCache &(Function &)> GetAssumptionCache, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI, + function_ref<BlockFrequencyInfo &(Function &)> GetBFI, + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { + + auto UserDecision = + llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI); + + if (UserDecision.hasValue()) { + if (UserDecision->isSuccess()) + return llvm::InlineCost::getAlways("always inline attribute"); + return llvm::InlineCost::getNever(UserDecision->getFailureReason()); + } LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() - << "... (caller:" << Caller->getName() << ")\n"); + << "... (caller:" << Call.getCaller()->getName() + << ")\n"); - InlineCostCallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, - *Callee, Call, Params); + InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI, + GetAssumptionCache, GetBFI, PSI, ORE); InlineResult ShouldInline = CA.analyze(); LLVM_DEBUG(CA.dump()); // Check if there was a reason to force inlining or no inlining. - if (!ShouldInline && CA.getCost() < CA.getThreshold()) - return InlineCost::getNever(ShouldInline.message); - if (ShouldInline && CA.getCost() >= CA.getThreshold()) + if (!ShouldInline.isSuccess() && CA.getCost() < CA.getThreshold()) + return InlineCost::getNever(ShouldInline.getFailureReason()); + if (ShouldInline.isSuccess() && CA.getCost() >= CA.getThreshold()) return InlineCost::getAlways("empty function"); return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); @@ -2195,14 +2422,14 @@ InlineResult llvm::isInlineViable(Function &F) { for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { // Disallow inlining of functions which contain indirect branches. if (isa<IndirectBrInst>(BI->getTerminator())) - return "contains indirect branches"; + return InlineResult::failure("contains indirect branches"); // Disallow inlining of blockaddresses which are used by non-callbr // instructions. if (BI->hasAddressTaken()) for (User *U : BlockAddress::get(&*BI)->users()) if (!isa<CallBrInst>(*U)) - return "blockaddress used outside of callbr"; + return InlineResult::failure("blockaddress used outside of callbr"); for (auto &II : *BI) { CallBase *Call = dyn_cast<CallBase>(&II); @@ -2211,13 +2438,13 @@ InlineResult llvm::isInlineViable(Function &F) { // Disallow recursive calls. if (&F == Call->getCalledFunction()) - return "recursive call"; + return InlineResult::failure("recursive call"); // Disallow calls which expose returns-twice to a function not previously // attributed as such. if (!ReturnsTwice && isa<CallInst>(Call) && cast<CallInst>(Call)->canReturnTwice()) - return "exposes returns-twice attribute"; + return InlineResult::failure("exposes returns-twice attribute"); if (Call->getCalledFunction()) switch (Call->getCalledFunction()->getIntrinsicID()) { @@ -2226,20 +2453,23 @@ InlineResult llvm::isInlineViable(Function &F) { case llvm::Intrinsic::icall_branch_funnel: // Disallow inlining of @llvm.icall.branch.funnel because current // backend can't separate call targets from call arguments. - return "disallowed inlining of @llvm.icall.branch.funnel"; + return InlineResult::failure( + "disallowed inlining of @llvm.icall.branch.funnel"); case llvm::Intrinsic::localescape: // Disallow inlining functions that call @llvm.localescape. Doing this // correctly would require major changes to the inliner. - return "disallowed inlining of @llvm.localescape"; + return InlineResult::failure( + "disallowed inlining of @llvm.localescape"); case llvm::Intrinsic::vastart: // Disallow inlining of functions that initialize VarArgs with // va_start. - return "contains VarArgs initialized with va_start"; + return InlineResult::failure( + "contains VarArgs initialized with va_start"); } } } - return true; + return InlineResult::success(); } // APIs to create InlineParams based on command line flags and/or other @@ -2299,7 +2529,7 @@ InlineParams llvm::getInlineParams(int Threshold) { } InlineParams llvm::getInlineParams() { - return getInlineParams(InlineThreshold); + return getInlineParams(DefaultThreshold); } // Compute the default threshold for inlining based on the opt level and the @@ -2312,7 +2542,7 @@ static int computeThresholdFromOptLevels(unsigned OptLevel, return InlineConstants::OptSizeThreshold; if (SizeOptLevel == 2) // -Oz return InlineConstants::OptMinSizeThreshold; - return InlineThreshold; + return DefaultThreshold; } InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) { @@ -2325,3 +2555,40 @@ InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) { Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold; return Params; } + +PreservedAnalyses +InlineCostAnnotationPrinterPass::run(Function &F, + FunctionAnalysisManager &FAM) { + PrintInstructionComments = true; + std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&]( + Function &F) -> AssumptionCache & { + return FAM.getResult<AssumptionAnalysis>(F); + }; + Module *M = F.getParent(); + ProfileSummaryInfo PSI(*M); + DataLayout DL(M); + TargetTransformInfo TTI(DL); + // FIXME: Redesign the usage of InlineParams to expand the scope of this pass. + // In the current implementation, the type of InlineParams doesn't matter as + // the pass serves only for verification of inliner's decisions. + // We can add a flag which determines InlineParams for this run. Right now, + // the default InlineParams are used. + const InlineParams Params = llvm::getInlineParams(); + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + if (CallInst *CI = dyn_cast<CallInst>(&I)) { + Function *CalledFunction = CI->getCalledFunction(); + if (!CalledFunction || CalledFunction->isDeclaration()) + continue; + OptimizationRemarkEmitter ORE(CalledFunction); + InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params, TTI, + GetAssumptionCache, nullptr, &PSI, &ORE); + ICCA.analyze(); + OS << " Analyzing call of " << CalledFunction->getName() + << "... (caller:" << CI->getCaller()->getName() << ")\n"; + ICCA.print(); + } + } + } + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Analysis/InlineFeaturesAnalysis.cpp b/llvm/lib/Analysis/InlineFeaturesAnalysis.cpp new file mode 100644 index 0000000000000..90f521bbaab48 --- /dev/null +++ b/llvm/lib/Analysis/InlineFeaturesAnalysis.cpp @@ -0,0 +1,41 @@ +//===- InlineFeaturesAnalysis.cpp - Feature extraction for ML Policies ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements an analysis extracting function features, which may be +// used by ML-driven policies, for example. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/InlineFeaturesAnalysis.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; + +AnalysisKey InlineFeaturesAnalysis::Key; + +InlineFeaturesAnalysis::Result +InlineFeaturesAnalysis::run(const Function &F, FunctionAnalysisManager &FAM) { + Result Ret; + Ret.Uses = ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses(); + for (const auto &BB : F) { + ++Ret.BasicBlockCount; + if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) { + if (BI->isConditional()) + Ret.BlocksReachedFromConditionalInstruction += BI->getNumSuccessors(); + } else if (const auto *SI = dyn_cast<SwitchInst>(BB.getTerminator())) + Ret.BlocksReachedFromConditionalInstruction += + (SI->getNumCases() + (nullptr != SI->getDefaultDest())); + for (const auto &I : BB) + if (auto *CS = dyn_cast<CallBase>(&I)) { + const auto *Callee = CS->getCalledFunction(); + if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration()) + ++Ret.DirectCallsToDefinedFunctions; + } + } + return Ret; +}
\ No newline at end of file diff --git a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp new file mode 100644 index 0000000000000..ebc59879d3577 --- /dev/null +++ b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp @@ -0,0 +1,299 @@ +//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements feature and label extraction for offline supervised learning +// of a IR to native size model. +// +//===----------------------------------------------------------------------===// +#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" + +#ifdef LLVM_HAVE_TF_API +#include "llvm/Analysis/Utils/TFUtils.h" +#endif +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +#include <algorithm> +#include <deque> + +using namespace llvm; + +AnalysisKey InlineSizeEstimatorAnalysis::Key; + +#define DEBUG_TYPE "inline-size-estimator" + +#ifdef LLVM_HAVE_TF_API +cl::opt<std::string> TFIR2NativeModelPath( + "ml-inliner-ir2native-model", cl::Hidden, + cl::desc("Path to saved model evaluating native size from IR.")); + +namespace { +unsigned getMaxInstructionID() { +#define LAST_OTHER_INST(NR) return NR; +#include "llvm/IR/Instruction.def" +} + +class IRToNativeSizeLearning { +public: + enum class NamedFeatureIndex : size_t { + InitialSize, + Blocks, + Calls, + IsLocal, + IsLinkOnceODR, + IsLinkOnce, + Loops, + MaxLoopDepth, + MaxDomTreeLevel, + + NumNamedFeatures + }; + static const size_t NumNamedFeatures = + static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures); + struct FunctionFeatures { + static std::vector<std::pair<size_t, size_t>> + ImportantInstructionSuccessions; + static const size_t FeatureCount; + + std::array<int32_t, NumNamedFeatures> NamedFeatures = {0}; + std::vector<int32_t> InstructionHistogram; + std::vector<int32_t> InstructionPairHistogram; + + void fillTensor(int32_t *Ptr) const; + int32_t &operator[](NamedFeatureIndex Pos) { + return NamedFeatures[static_cast<size_t>(Pos)]; + } + }; + IRToNativeSizeLearning() = default; + + static FunctionFeatures getFunctionFeatures(Function &F, + FunctionAnalysisManager &FAM); + +private: + /// Sort once the feature tuples. + struct SortFeatureTuples { + bool IsSorted = false; + SortFeatureTuples() { + std::sort(FunctionFeatures::ImportantInstructionSuccessions.begin(), + FunctionFeatures::ImportantInstructionSuccessions.end()); + IsSorted = true; + } + }; + + static llvm::ManagedStatic<SortFeatureTuples> TupleSorter; + + static bool ensureSortedTuples() { return TupleSorter->IsSorted; } +}; +llvm::ManagedStatic<IRToNativeSizeLearning::SortFeatureTuples> + IRToNativeSizeLearning::TupleSorter; + +// This is a point in time - we determined including these pairs of +// consecutive instructions (in the IR layout available at inline time) as +// features improves the model performance. We want to move away from manual +// feature selection. +// The vector is given in opcode pairs rather than labels because 1) labels +// weren't readily available, and 2) the successions were hand - extracted +std::vector<std::pair<size_t, size_t>> + IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions = + {{1, 34}, {15, 27}, {53, 53}, {53, 34}, {1, 11}, {32, 2}, {2, 48}, + {28, 48}, {1, 45}, {49, 32}, {57, 56}, {55, 53}, {1, 28}, {57, 34}, + {1, 1}, {32, 28}, {32, 15}, {49, 28}, {53, 1}, {2, 53}, {48, 34}, + {28, 53}, {2, 32}, {1, 40}, {32, 48}, {29, 56}, {56, 32}, {55, 56}, + {48, 56}, {1, 31}, {33, 34}, {2, 28}, {1, 12}, {55, 1}, {31, 31}, + {65, 1}, {33, 56}, {32, 32}, {13, 13}, {1, 26}, {13, 26}, {2, 1}, + {1, 33}, {47, 49}, {64, 1}, {2, 38}, {34, 53}, {48, 2}, {55, 34}, + {34, 32}, {1, 5}, {56, 13}, {2, 2}, {2, 49}, {33, 2}, {49, 39}, + {56, 49}, {33, 49}, {32, 39}, {39, 57}, {29, 33}, {31, 34}, {32, 29}, + {47, 15}, {13, 34}, {2, 33}, {32, 49}, {49, 34}, {56, 33}, {1, 30}, + {33, 33}, {31, 33}, {2, 29}, {56, 7}, {32, 13}, {2, 55}, {56, 56}, + {2, 34}, {1, 42}, {34, 49}, {1, 20}, {32, 33}, {1, 25}, {53, 28}, + {1, 14}, {31, 49}, {28, 2}, {2, 13}, {2, 56}, {1, 32}, {56, 53}, + {65, 65}, {33, 53}, {64, 64}, {13, 2}, {34, 33}, {1, 4}, {49, 2}, + {1, 9}, {56, 1}, {33, 1}, {53, 57}, {32, 53}, {13, 56}, {32, 56}, + {55, 55}, {1, 18}, {49, 56}, {34, 34}, {1, 7}, {56, 64}, {32, 1}, + {13, 33}, {55, 28}, {49, 33}, {57, 57}, {56, 34}, {34, 56}, {33, 32}, + {32, 40}, {1, 29}, {53, 2}, {34, 1}, {32, 34}, {49, 49}, {1, 24}, + {40, 34}, {1, 13}, {38, 34}, {29, 2}, {34, 2}, {1, 39}, {1, 22}, + {1, 27}, {49, 1}, {1, 8}, {56, 2}}; + +// We have: 9 calculated features (the features here); 1 feature for each +// instruction opcode; and 1 feature for each manually-identified sequence. +// For the latter 2, we build a histogram: we count the number of +// occurrences of each instruction opcode or succession of instructions, +// respectively. +// Note that instruction opcodes start from 1. For convenience, we also have an +// always 0 feature for the '0' opcode, hence the extra 1. +const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount = + IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions + .size() + + getMaxInstructionID() + 1 + IRToNativeSizeLearning::NumNamedFeatures; + +size_t getSize(Function &F, TargetTransformInfo &TTI) { + size_t Ret = 0; + for (auto &BB : F) + for (auto &I : BB) + Ret += TTI.getInstructionCost( + &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize); + return Ret; +} + +size_t getSize(Function &F, FunctionAnalysisManager &FAM) { + auto &TTI = FAM.getResult<TargetIRAnalysis>(F); + return getSize(F, TTI); +} + +unsigned getMaxDominatorTreeDepth(const Function &F, + const DominatorTree &Tree) { + unsigned Ret = 0; + for (auto &BB : F) + if (auto *TN = Tree.getNode(&BB)) + Ret = std::max(Ret, TN->getLevel()); + return Ret; +} +} // namespace + +IRToNativeSizeLearning::FunctionFeatures +IRToNativeSizeLearning::getFunctionFeatures(Function &F, + FunctionAnalysisManager &FAM) { + assert(ensureSortedTuples() && "expected lazy initialization"); + + auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F); + FunctionFeatures FF; + size_t InstrCount = getMaxInstructionID() + 1; + FF.InstructionHistogram.resize(InstrCount); + + FF.InstructionPairHistogram.resize( + FunctionFeatures::ImportantInstructionSuccessions.size()); + + auto StartID = 0; + auto LastID = StartID; + auto getPairIndex = [](size_t a, size_t b) { + auto I = + std::find(FunctionFeatures::ImportantInstructionSuccessions.begin(), + FunctionFeatures::ImportantInstructionSuccessions.end(), + std::make_pair(a, b)); + if (I == FunctionFeatures::ImportantInstructionSuccessions.end()) + return -1; + return static_cast<int>(std::distance( + FunctionFeatures::ImportantInstructionSuccessions.begin(), I)); + }; + + // We don't want debug calls, because they'd just add noise. + for (auto &BB : F) { + for (auto I = BB.instructionsWithoutDebug().begin(), + E = BB.instructionsWithoutDebug().end(); + I != E; ++I) { + auto ID = I->getOpcode(); + + ++FF.InstructionHistogram[ID]; + int PairIndex = getPairIndex(LastID, ID); + if (PairIndex >= 0) + ++FF.InstructionPairHistogram[PairIndex]; + LastID = ID; + if (isa<CallBase>(*I)) + ++FF[NamedFeatureIndex::Calls]; + } + } + + FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM); + FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage(); + FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage(); + FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage(); + FF[NamedFeatureIndex::Blocks] = + std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end()); + auto &LI = FAM.getResult<LoopAnalysis>(F); + FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end()); + for (auto &L : LI) + FF[NamedFeatureIndex::MaxLoopDepth] = + std::max(FF[NamedFeatureIndex::MaxLoopDepth], + static_cast<int32_t>(L->getLoopDepth())); + FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree); + return FF; +} + +void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const { + std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr); + Ptr += NamedFeatures.size(); + std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr); + Ptr += InstructionHistogram.size(); + std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(), + Ptr); +} + +bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { + return !TFIR2NativeModelPath.empty(); +} + +InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() { + if (!isEvaluatorRequested()) { + return; + } + std::vector<std::string> InputNames{"serving_default_input_1"}; + std::vector<std::string> OutputName{"StatefulPartitionedCall"}; + Evaluator = std::make_unique<TFModelEvaluator>( + TFIR2NativeModelPath.getValue().c_str(), InputNames, OutputName); + if (!Evaluator || !Evaluator->isValid()) { + Evaluator.reset(); + return; + } + static const std::vector<int64_t> Dim{ + 1, static_cast<int64_t>( + IRToNativeSizeLearning::FunctionFeatures::FeatureCount)}; + + Evaluator->initInput<int32_t>(0, Dim); +} + +InlineSizeEstimatorAnalysis::Result +InlineSizeEstimatorAnalysis::run(const Function &F, + FunctionAnalysisManager &FAM) { + if (!Evaluator) + return None; + auto Features = IRToNativeSizeLearning::getFunctionFeatures( + const_cast<Function &>(F), FAM); + int32_t *V = Evaluator->getInput<int32_t>(0); + Features.fillTensor(V); + auto ER = Evaluator->evaluate(); + if (!ER) + return None; + float Ret = *ER->getTensorValue<float>(0); + if (Ret < 0.0) + Ret = 0.0; + return static_cast<size_t>(Ret); +} + +InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} +InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis( + InlineSizeEstimatorAnalysis &&Other) + : Evaluator(std::move(Other.Evaluator)) {} + +#else +namespace llvm { +class TFModelEvaluator {}; +} // namespace llvm +InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {} +InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis( + InlineSizeEstimatorAnalysis &&) {} +InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} +InlineSizeEstimatorAnalysis::Result +InlineSizeEstimatorAnalysis::run(const Function &F, + FunctionAnalysisManager &FAM) { + return None; +} +bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; } +#endif
\ No newline at end of file diff --git a/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp b/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp index 415797d6a3783..c26cdf2266da8 100644 --- a/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp +++ b/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp @@ -59,7 +59,7 @@ bool InstructionPrecedenceTracking::isPreceededBySpecialInstruction( const Instruction *Insn) { const Instruction *MaybeFirstSpecial = getFirstSpecialInstruction(Insn->getParent()); - return MaybeFirstSpecial && OI.dominates(MaybeFirstSpecial, Insn); + return MaybeFirstSpecial && MaybeFirstSpecial->comesBefore(Insn); } void InstructionPrecedenceTracking::fill(const BasicBlock *BB) { @@ -104,18 +104,14 @@ void InstructionPrecedenceTracking::insertInstructionTo(const Instruction *Inst, const BasicBlock *BB) { if (isSpecialInstruction(Inst)) FirstSpecialInsts.erase(BB); - OI.invalidateBlock(BB); } void InstructionPrecedenceTracking::removeInstruction(const Instruction *Inst) { if (isSpecialInstruction(Inst)) FirstSpecialInsts.erase(Inst->getParent()); - OI.invalidateBlock(Inst->getParent()); } void InstructionPrecedenceTracking::clear() { - for (auto It : FirstSpecialInsts) - OI.invalidateBlock(It.first); FirstSpecialInsts.clear(); #ifndef NDEBUG // The map should be valid after clearing (at least empty). @@ -130,26 +126,7 @@ bool ImplicitControlFlowTracking::isSpecialInstruction( // to avoid wrong assumptions of sort "if A is executed and B post-dominates // A, then B is also executed". This is not true is there is an implicit // control flow instruction (e.g. a guard) between them. - // - // TODO: Currently, isGuaranteedToTransferExecutionToSuccessor returns false - // for volatile stores and loads because they can trap. The discussion on - // whether or not it is correct is still ongoing. We might want to get rid - // of this logic in the future. Anyways, trapping instructions shouldn't - // introduce implicit control flow, so we explicitly allow them here. This - // must be removed once isGuaranteedToTransferExecutionToSuccessor is fixed. - if (isGuaranteedToTransferExecutionToSuccessor(Insn)) - return false; - if (isa<LoadInst>(Insn)) { - assert(cast<LoadInst>(Insn)->isVolatile() && - "Non-volatile load should transfer execution to successor!"); - return false; - } - if (isa<StoreInst>(Insn)) { - assert(cast<StoreInst>(Insn)->isVolatile() && - "Non-volatile store should transfer execution to successor!"); - return false; - } - return true; + return !isGuaranteedToTransferExecutionToSuccessor(Insn); } bool MemoryWriteTracking::isSpecialInstruction( diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index d7510c8991013..0975a65d183e4 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -222,7 +222,7 @@ static bool valueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { // Otherwise, if the instruction is in the entry block and is not an invoke, // then it obviously dominates all phi nodes. if (I->getParent() == &I->getFunction()->getEntryBlock() && - !isa<InvokeInst>(I)) + !isa<InvokeInst>(I) && !isa<CallBrInst>(I)) return true; return false; @@ -707,9 +707,8 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V, Offset = Offset.sextOrTrunc(IntIdxTy->getIntegerBitWidth()); Constant *OffsetIntPtr = ConstantInt::get(IntIdxTy, Offset); - if (V->getType()->isVectorTy()) - return ConstantVector::getSplat(V->getType()->getVectorNumElements(), - OffsetIntPtr); + if (VectorType *VecTy = dyn_cast<VectorType>(V->getType())) + return ConstantVector::getSplat(VecTy->getElementCount(), OffsetIntPtr); return OffsetIntPtr; } @@ -943,11 +942,12 @@ static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) { if (match(Op1, m_Zero())) return UndefValue::get(Ty); - // If any element of a constant divisor vector is zero or undef, the whole op - // is undef. + // If any element of a constant divisor fixed width vector is zero or undef, + // the whole op is undef. auto *Op1C = dyn_cast<Constant>(Op1); - if (Op1C && Ty->isVectorTy()) { - unsigned NumElts = Ty->getVectorNumElements(); + auto *VTy = dyn_cast<FixedVectorType>(Ty); + if (Op1C && VTy) { + unsigned NumElts = VTy->getNumElements(); for (unsigned i = 0; i != NumElts; ++i) { Constant *Elt = Op1C->getAggregateElement(i); if (Elt && (Elt->isNullValue() || isa<UndefValue>(Elt))) @@ -1222,7 +1222,8 @@ static bool isUndefShift(Value *Amount) { // If all lanes of a vector shift are undefined the whole shift is. if (isa<ConstantVector>(C) || isa<ConstantDataVector>(C)) { - for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E; ++I) + for (unsigned I = 0, E = cast<VectorType>(C->getType())->getNumElements(); + I != E; ++I) if (!isUndefShift(C->getAggregateElement(I))) return false; return true; @@ -1429,9 +1430,6 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, if (match(UnsignedICmp, m_c_ICmp(UnsignedPred, m_Specific(A), m_Specific(B))) && ICmpInst::isUnsigned(UnsignedPred)) { - if (UnsignedICmp->getOperand(0) != A) - UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred); - // A >=/<= B || (A - B) != 0 <--> true if ((UnsignedPred == ICmpInst::ICMP_UGE || UnsignedPred == ICmpInst::ICMP_ULE) && @@ -1461,9 +1459,6 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, // Y < A || Y == 0 --> Y < A iff B != 0 if (match(UnsignedICmp, m_c_ICmp(UnsignedPred, m_Specific(Y), m_Specific(A)))) { - if (UnsignedICmp->getOperand(0) != Y) - UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred); - if (UnsignedPred == ICmpInst::ICMP_UGE && IsAnd && EqPred == ICmpInst::ICMP_NE && isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) @@ -1485,10 +1480,11 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, else return nullptr; - // X < Y && Y != 0 --> X < Y - // X < Y || Y != 0 --> Y != 0 - if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE) - return IsAnd ? UnsignedICmp : ZeroICmp; + // X > Y && Y == 0 --> Y == 0 iff X != 0 + // X > Y || Y == 0 --> X > Y iff X != 0 + if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ && + isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return IsAnd ? ZeroICmp : UnsignedICmp; // X <= Y && Y != 0 --> X <= Y iff X != 0 // X <= Y || Y != 0 --> Y != 0 iff X != 0 @@ -1496,17 +1492,21 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) return IsAnd ? UnsignedICmp : ZeroICmp; + // The transforms below here are expected to be handled more generally with + // simplifyAndOrOfICmpsWithLimitConst() or in InstCombine's + // foldAndOrOfICmpsWithConstEq(). If we are looking to trim optimizer overlap, + // these are candidates for removal. + + // X < Y && Y != 0 --> X < Y + // X < Y || Y != 0 --> Y != 0 + if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE) + return IsAnd ? UnsignedICmp : ZeroICmp; + // X >= Y && Y == 0 --> Y == 0 // X >= Y || Y == 0 --> X >= Y if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_EQ) return IsAnd ? ZeroICmp : UnsignedICmp; - // X > Y && Y == 0 --> Y == 0 iff X != 0 - // X > Y || Y == 0 --> X > Y iff X != 0 - if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ && - isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) - return IsAnd ? ZeroICmp : UnsignedICmp; - // X < Y && Y == 0 --> false if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_EQ && IsAnd) @@ -1695,6 +1695,64 @@ static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1, return nullptr; } +/// Try to eliminate compares with signed or unsigned min/max constants. +static Value *simplifyAndOrOfICmpsWithLimitConst(ICmpInst *Cmp0, ICmpInst *Cmp1, + bool IsAnd) { + // Canonicalize an equality compare as Cmp0. + if (Cmp1->isEquality()) + std::swap(Cmp0, Cmp1); + if (!Cmp0->isEquality()) + return nullptr; + + // The equality compare must be against a constant. Convert the 'null' pointer + // constant to an integer zero value. + APInt MinMaxC; + const APInt *C; + if (match(Cmp0->getOperand(1), m_APInt(C))) + MinMaxC = *C; + else if (isa<ConstantPointerNull>(Cmp0->getOperand(1))) + MinMaxC = APInt::getNullValue(8); + else + return nullptr; + + // The non-equality compare must include a common operand (X). Canonicalize + // the common operand as operand 0 (the predicate is swapped if the common + // operand was operand 1). + ICmpInst::Predicate Pred0 = Cmp0->getPredicate(); + Value *X = Cmp0->getOperand(0); + ICmpInst::Predicate Pred1; + if (!match(Cmp1, m_c_ICmp(Pred1, m_Specific(X), m_Value())) || + ICmpInst::isEquality(Pred1)) + return nullptr; + + // DeMorganize if this is 'or': P0 || P1 --> !P0 && !P1. + if (!IsAnd) { + Pred0 = ICmpInst::getInversePredicate(Pred0); + Pred1 = ICmpInst::getInversePredicate(Pred1); + } + + // Normalize to unsigned compare and unsigned min/max value. + // Example for 8-bit: -128 + 128 -> 0; 127 + 128 -> 255 + if (ICmpInst::isSigned(Pred1)) { + Pred1 = ICmpInst::getUnsignedPredicate(Pred1); + MinMaxC += APInt::getSignedMinValue(MinMaxC.getBitWidth()); + } + + // (X != MAX) && (X < Y) --> X < Y + // (X == MAX) || (X >= Y) --> X >= Y + if (MinMaxC.isMaxValue()) + if (Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_ULT) + return Cmp1; + + // (X != MIN) && (X > Y) --> X > Y + // (X == MIN) || (X <= Y) --> X <= Y + if (MinMaxC.isMinValue()) + if (Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_UGT) + return Cmp1; + + return nullptr; +} + static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1, const SimplifyQuery &Q) { if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true, Q)) @@ -1710,6 +1768,9 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1, if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, true)) return X; + if (Value *X = simplifyAndOrOfICmpsWithLimitConst(Op0, Op1, true)) + return X; + if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true)) return X; @@ -1783,6 +1844,9 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1, if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, false)) return X; + if (Value *X = simplifyAndOrOfICmpsWithLimitConst(Op0, Op1, false)) + return X; + if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false)) return X; @@ -2131,7 +2195,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return Constant::getAllOnesValue(Op1->getType()); // A | ~(A & ?) = -1 - if (match(Op1, m_Not(m_c_And(m_Specific(Op1), m_Value())))) + if (match(Op1, m_Not(m_c_And(m_Specific(Op0), m_Value())))) return Constant::getAllOnesValue(Op0->getType()); Value *A, *B; @@ -2347,10 +2411,9 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI, RHS = RHS->stripPointerCasts(); // A non-null pointer is not equal to a null pointer. - if (llvm::isKnownNonZero(LHS, DL, 0, nullptr, nullptr, nullptr, - IIQ.UseInstrInfo) && - isa<ConstantPointerNull>(RHS) && - (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE)) + if (isa<ConstantPointerNull>(RHS) && ICmpInst::isEquality(Pred) && + llvm::isKnownNonZero(LHS, DL, 0, nullptr, nullptr, nullptr, + IIQ.UseInstrInfo)) return ConstantInt::get(GetCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred)); @@ -3218,6 +3281,30 @@ static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, return nullptr; } +static Value *simplifyICmpWithDominatingAssume(CmpInst::Predicate Predicate, + Value *LHS, Value *RHS, + const SimplifyQuery &Q) { + // Gracefully handle instructions that have not been inserted yet. + if (!Q.AC || !Q.CxtI || !Q.CxtI->getParent()) + return nullptr; + + for (Value *AssumeBaseOp : {LHS, RHS}) { + for (auto &AssumeVH : Q.AC->assumptionsFor(AssumeBaseOp)) { + if (!AssumeVH) + continue; + + CallInst *Assume = cast<CallInst>(AssumeVH); + if (Optional<bool> Imp = + isImpliedCondition(Assume->getArgOperand(0), Predicate, LHS, RHS, + Q.DL)) + if (isValidAssumeForContext(Assume, Q.CxtI, Q.DT)) + return ConstantInt::get(GetCompareTy(LHS), *Imp); + } + } + + return nullptr; +} + /// Given operands for an ICmpInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, @@ -3318,6 +3405,15 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, MaxRecurse-1)) return V; } + // Fold (zext X) ule (sext X), (zext X) sge (sext X) to true. + else if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) { + if (SrcOp == RI->getOperand(0)) { + if (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_SGE) + return ConstantInt::getTrue(ITy); + if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SLT) + return ConstantInt::getFalse(ITy); + } + } // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended // too. If not, then try to deduce the result of the comparison. else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { @@ -3377,6 +3473,15 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Q, MaxRecurse-1)) return V; } + // Fold (sext X) uge (zext X), (sext X) sle (zext X) to true. + else if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) { + if (SrcOp == RI->getOperand(0)) { + if (Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_SLE) + return ConstantInt::getTrue(ITy); + if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SGT) + return ConstantInt::getFalse(ITy); + } + } // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended // too. If not, then try to deduce the result of the comparison. else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { @@ -3452,6 +3557,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (Value *V = simplifyICmpWithMinMax(Pred, LHS, RHS, Q, MaxRecurse)) return V; + if (Value *V = simplifyICmpWithDominatingAssume(Pred, LHS, RHS, Q)) + return V; + // Simplify comparisons of related pointers using a powerful, recursive // GEP-walk when we have target data available.. if (LHS->getType()->isPointerTy()) @@ -3487,7 +3595,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, SmallVector<Value *, 4> IndicesRHS(GRHS->idx_begin(), GRHS->idx_end()); Constant *NewRHS = ConstantExpr::getGetElementPtr( GLHS->getSourceElementType(), Null, IndicesRHS); - return ConstantExpr::getICmp(Pred, NewLHS, NewRHS); + Constant *NewICmp = ConstantExpr::getICmp(Pred, NewLHS, NewRHS); + return ConstantFoldConstant(NewICmp, Q.DL); } } } @@ -3622,9 +3731,9 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Check comparison of [minnum/maxnum with constant] with other constant. const APFloat *C2; if ((match(LHS, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_APFloat(C2))) && - C2->compare(*C) == APFloat::cmpLessThan) || + *C2 < *C) || (match(LHS, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_APFloat(C2))) && - C2->compare(*C) == APFloat::cmpGreaterThan)) { + *C2 > *C)) { bool IsMaxNum = cast<IntrinsicInst>(LHS)->getIntrinsicID() == Intrinsic::maxnum; // The ordered relationship and minnum/maxnum guarantee that we do not @@ -4009,11 +4118,47 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, if (TrueVal == FalseVal) return TrueVal; - if (isa<UndefValue>(TrueVal)) // select ?, undef, X -> X + // If the true or false value is undef, we can fold to the other value as + // long as the other value isn't poison. + // select ?, undef, X -> X + if (isa<UndefValue>(TrueVal) && + isGuaranteedNotToBeUndefOrPoison(FalseVal, Q.CxtI, Q.DT)) return FalseVal; - if (isa<UndefValue>(FalseVal)) // select ?, X, undef -> X + // select ?, X, undef -> X + if (isa<UndefValue>(FalseVal) && + isGuaranteedNotToBeUndefOrPoison(TrueVal, Q.CxtI, Q.DT)) return TrueVal; + // Deal with partial undef vector constants: select ?, VecC, VecC' --> VecC'' + Constant *TrueC, *FalseC; + if (TrueVal->getType()->isVectorTy() && match(TrueVal, m_Constant(TrueC)) && + match(FalseVal, m_Constant(FalseC))) { + unsigned NumElts = cast<VectorType>(TrueC->getType())->getNumElements(); + SmallVector<Constant *, 16> NewC; + for (unsigned i = 0; i != NumElts; ++i) { + // Bail out on incomplete vector constants. + Constant *TEltC = TrueC->getAggregateElement(i); + Constant *FEltC = FalseC->getAggregateElement(i); + if (!TEltC || !FEltC) + break; + + // If the elements match (undef or not), that value is the result. If only + // one element is undef, choose the defined element as the safe result. + if (TEltC == FEltC) + NewC.push_back(TEltC); + else if (isa<UndefValue>(TEltC) && + isGuaranteedNotToBeUndefOrPoison(FEltC)) + NewC.push_back(FEltC); + else if (isa<UndefValue>(FEltC) && + isGuaranteedNotToBeUndefOrPoison(TEltC)) + NewC.push_back(TEltC); + else + break; + } + if (NewC.size() == NumElts) + return ConstantVector::get(NewC); + } + if (Value *V = simplifySelectWithICmpCond(Cond, TrueVal, FalseVal, Q, MaxRecurse)) return V; @@ -4052,20 +4197,22 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Ops.slice(1)); Type *GEPTy = PointerType::get(LastType, AS); if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType())) - GEPTy = VectorType::get(GEPTy, VT->getNumElements()); + GEPTy = VectorType::get(GEPTy, VT->getElementCount()); else if (VectorType *VT = dyn_cast<VectorType>(Ops[1]->getType())) - GEPTy = VectorType::get(GEPTy, VT->getNumElements()); + GEPTy = VectorType::get(GEPTy, VT->getElementCount()); if (isa<UndefValue>(Ops[0])) return UndefValue::get(GEPTy); + bool IsScalableVec = isa<ScalableVectorType>(SrcTy); + if (Ops.size() == 2) { // getelementptr P, 0 -> P. if (match(Ops[1], m_Zero()) && Ops[0]->getType() == GEPTy) return Ops[0]; Type *Ty = SrcTy; - if (Ty->isSized()) { + if (!IsScalableVec && Ty->isSized()) { Value *P; uint64_t C; uint64_t TyAllocSize = Q.DL.getTypeAllocSize(Ty); @@ -4113,7 +4260,7 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, } } - if (Q.DL.getTypeAllocSize(LastType) == 1 && + if (!IsScalableVec && Q.DL.getTypeAllocSize(LastType) == 1 && all_of(Ops.slice(1).drop_back(1), [](Value *Idx) { return match(Idx, m_Zero()); })) { unsigned IdxWidth = @@ -4145,9 +4292,7 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, auto *CE = ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ops[0]), Ops.slice(1)); - if (auto *CEFolded = ConstantFoldConstant(CE, Q.DL)) - return CEFolded; - return CE; + return ConstantFoldConstant(CE, Q.DL); } Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, @@ -4199,10 +4344,10 @@ Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx, if (VecC && ValC && IdxC) return ConstantFoldInsertElementInstruction(VecC, ValC, IdxC); - // Fold into undef if index is out of bounds. + // For fixed-length vector, fold into undef if index is out of bounds. if (auto *CI = dyn_cast<ConstantInt>(Idx)) { - uint64_t NumElements = cast<VectorType>(Vec->getType())->getNumElements(); - if (CI->uge(NumElements)) + if (isa<FixedVectorType>(Vec->getType()) && + CI->uge(cast<FixedVectorType>(Vec->getType())->getNumElements())) return UndefValue::get(Vec->getType()); } @@ -4210,15 +4355,15 @@ Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx, if (isa<UndefValue>(Idx)) return UndefValue::get(Vec->getType()); - // Inserting an undef scalar? Assume it is the same value as the existing - // vector element. - if (isa<UndefValue>(Val)) + // If the scalar is undef, and there is no risk of propagating poison from the + // vector value, simplify to the vector value. + if (isa<UndefValue>(Val) && isGuaranteedNotToBeUndefOrPoison(Vec)) return Vec; // If we are extracting a value from a vector, then inserting it into the same // place, that's the input vector: // insertelt Vec, (extractelt Vec, Idx), Idx --> Vec - if (match(Val, m_ExtractElement(m_Specific(Vec), m_Specific(Idx)))) + if (match(Val, m_ExtractElt(m_Specific(Vec), m_Specific(Idx)))) return Vec; return nullptr; @@ -4258,6 +4403,7 @@ Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, /// If not, this returns null. static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &, unsigned) { + auto *VecVTy = cast<VectorType>(Vec->getType()); if (auto *CVec = dyn_cast<Constant>(Vec)) { if (auto *CIdx = dyn_cast<Constant>(Idx)) return ConstantFoldExtractElementInstruction(CVec, CIdx); @@ -4267,15 +4413,16 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQ return Splat; if (isa<UndefValue>(Vec)) - return UndefValue::get(Vec->getType()->getVectorElementType()); + return UndefValue::get(VecVTy->getElementType()); } // If extracting a specified index from the vector, see if we can recursively // find a previously computed scalar that was inserted into the vector. if (auto *IdxC = dyn_cast<ConstantInt>(Idx)) { - if (IdxC->getValue().uge(Vec->getType()->getVectorNumElements())) - // definitely out of bounds, thus undefined result - return UndefValue::get(Vec->getType()->getVectorElementType()); + // For fixed-length vector, fold into undef if index is out of bounds. + if (isa<FixedVectorType>(VecVTy) && + IdxC->getValue().uge(VecVTy->getNumElements())) + return UndefValue::get(VecVTy->getElementType()); if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue())) return Elt; } @@ -4283,7 +4430,7 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQ // An undef extract index can be arbitrarily chosen to be an out-of-range // index value, which would result in the instruction being undef. if (isa<UndefValue>(Idx)) - return UndefValue::get(Vec->getType()->getVectorElementType()); + return UndefValue::get(VecVTy->getElementType()); return nullptr; } @@ -4380,7 +4527,7 @@ static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1, return nullptr; // The mask value chooses which source operand we need to look at next. - int InVecNumElts = Op0->getType()->getVectorNumElements(); + int InVecNumElts = cast<VectorType>(Op0->getType())->getNumElements(); int RootElt = MaskVal; Value *SourceOp = Op0; if (MaskVal >= InVecNumElts) { @@ -4416,59 +4563,68 @@ static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1, return RootVec; } -static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, - Type *RetTy, const SimplifyQuery &Q, +static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, + ArrayRef<int> Mask, Type *RetTy, + const SimplifyQuery &Q, unsigned MaxRecurse) { - if (isa<UndefValue>(Mask)) + if (all_of(Mask, [](int Elem) { return Elem == UndefMaskElem; })) return UndefValue::get(RetTy); - Type *InVecTy = Op0->getType(); - unsigned MaskNumElts = Mask->getType()->getVectorNumElements(); - unsigned InVecNumElts = InVecTy->getVectorNumElements(); + auto *InVecTy = cast<VectorType>(Op0->getType()); + unsigned MaskNumElts = Mask.size(); + ElementCount InVecEltCount = InVecTy->getElementCount(); + + bool Scalable = InVecEltCount.Scalable; SmallVector<int, 32> Indices; - ShuffleVectorInst::getShuffleMask(Mask, Indices); - assert(MaskNumElts == Indices.size() && - "Size of Indices not same as number of mask elements?"); + Indices.assign(Mask.begin(), Mask.end()); // Canonicalization: If mask does not select elements from an input vector, // replace that input vector with undef. - bool MaskSelects0 = false, MaskSelects1 = false; - for (unsigned i = 0; i != MaskNumElts; ++i) { - if (Indices[i] == -1) - continue; - if ((unsigned)Indices[i] < InVecNumElts) - MaskSelects0 = true; - else - MaskSelects1 = true; + if (!Scalable) { + bool MaskSelects0 = false, MaskSelects1 = false; + unsigned InVecNumElts = InVecEltCount.Min; + for (unsigned i = 0; i != MaskNumElts; ++i) { + if (Indices[i] == -1) + continue; + if ((unsigned)Indices[i] < InVecNumElts) + MaskSelects0 = true; + else + MaskSelects1 = true; + } + if (!MaskSelects0) + Op0 = UndefValue::get(InVecTy); + if (!MaskSelects1) + Op1 = UndefValue::get(InVecTy); } - if (!MaskSelects0) - Op0 = UndefValue::get(InVecTy); - if (!MaskSelects1) - Op1 = UndefValue::get(InVecTy); auto *Op0Const = dyn_cast<Constant>(Op0); auto *Op1Const = dyn_cast<Constant>(Op1); - // If all operands are constant, constant fold the shuffle. - if (Op0Const && Op1Const) + // If all operands are constant, constant fold the shuffle. This + // transformation depends on the value of the mask which is not known at + // compile time for scalable vectors + if (!Scalable && Op0Const && Op1Const) return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask); // Canonicalization: if only one input vector is constant, it shall be the - // second one. - if (Op0Const && !Op1Const) { + // second one. This transformation depends on the value of the mask which + // is not known at compile time for scalable vectors + if (!Scalable && Op0Const && !Op1Const) { std::swap(Op0, Op1); - ShuffleVectorInst::commuteShuffleMask(Indices, InVecNumElts); + ShuffleVectorInst::commuteShuffleMask(Indices, InVecEltCount.Min); } // A splat of an inserted scalar constant becomes a vector constant: // shuf (inselt ?, C, IndexC), undef, <IndexC, IndexC...> --> <C, C...> // NOTE: We may have commuted above, so analyze the updated Indices, not the // original mask constant. + // NOTE: This transformation depends on the value of the mask which is not + // known at compile time for scalable vectors Constant *C; ConstantInt *IndexC; - if (match(Op0, m_InsertElement(m_Value(), m_Constant(C), - m_ConstantInt(IndexC)))) { + if (!Scalable && match(Op0, m_InsertElt(m_Value(), m_Constant(C), + m_ConstantInt(IndexC)))) { // Match a splat shuffle mask of the insert index allowing undef elements. int InsertIndex = IndexC->getZExtValue(); if (all_of(Indices, [InsertIndex](int MaskElt) { @@ -4489,9 +4645,14 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, // value type is same as the input vectors' type. if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op0)) if (isa<UndefValue>(Op1) && RetTy == InVecTy && - OpShuf->getMask()->getSplatValue()) + is_splat(OpShuf->getShuffleMask())) return Op0; + // All remaining transformation depend on the value of the mask, which is + // not known at compile time for scalable vectors. + if (Scalable) + return nullptr; + // Don't fold a shuffle with undef mask elements. This may get folded in a // better way using demanded bits or other analysis. // TODO: Should we allow this? @@ -4517,8 +4678,9 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, } /// Given operands for a ShuffleVectorInst, fold the result or return null. -Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, - Type *RetTy, const SimplifyQuery &Q) { +Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, + ArrayRef<int> Mask, Type *RetTy, + const SimplifyQuery &Q) { return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit); } @@ -4562,14 +4724,24 @@ static Constant *propagateNaN(Constant *In) { /// Perform folds that are common to any floating-point operation. This implies /// transforms based on undef/NaN because the operation itself makes no /// difference to the result. -static Constant *simplifyFPOp(ArrayRef<Value *> Ops) { - if (any_of(Ops, [](Value *V) { return isa<UndefValue>(V); })) - return ConstantFP::getNaN(Ops[0]->getType()); - - for (Value *V : Ops) - if (match(V, m_NaN())) +static Constant *simplifyFPOp(ArrayRef<Value *> Ops, + FastMathFlags FMF = FastMathFlags()) { + for (Value *V : Ops) { + bool IsNan = match(V, m_NaN()); + bool IsInf = match(V, m_Inf()); + bool IsUndef = match(V, m_Undef()); + + // If this operation has 'nnan' or 'ninf' and at least 1 disallowed operand + // (an undef operand can be chosen to be Nan/Inf), then the result of + // this operation is poison. That result can be relaxed to undef. + if (FMF.noNaNs() && (IsNan || IsUndef)) + return UndefValue::get(V->getType()); + if (FMF.noInfs() && (IsInf || IsUndef)) + return UndefValue::get(V->getType()); + + if (IsUndef || IsNan) return propagateNaN(cast<Constant>(V)); - + } return nullptr; } @@ -4580,7 +4752,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPOp({Op0, Op1})) + if (Constant *C = simplifyFPOp({Op0, Op1}, FMF)) return C; // fadd X, -0 ==> X @@ -4627,7 +4799,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPOp({Op0, Op1})) + if (Constant *C = simplifyFPOp({Op0, Op1}, FMF)) return C; // fsub X, +0 ==> X @@ -4669,7 +4841,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { - if (Constant *C = simplifyFPOp({Op0, Op1})) + if (Constant *C = simplifyFPOp({Op0, Op1}, FMF)) return C; // fmul X, 1.0 ==> X @@ -4736,7 +4908,7 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPOp({Op0, Op1})) + if (Constant *C = simplifyFPOp({Op0, Op1}, FMF)) return C; // X / 1.0 -> X @@ -4781,7 +4953,7 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPOp({Op0, Op1})) + if (Constant *C = simplifyFPOp({Op0, Op1}, FMF)) return C; // Unlike fdiv, the result of frem always matches the sign of the dividend. @@ -4942,6 +5114,7 @@ static bool IsIdempotent(Intrinsic::ID ID) { case Intrinsic::rint: case Intrinsic::nearbyint: case Intrinsic::round: + case Intrinsic::roundeven: case Intrinsic::canonicalize: return true; } @@ -5057,6 +5230,7 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, case Intrinsic::trunc: case Intrinsic::ceil: case Intrinsic::round: + case Intrinsic::roundeven: case Intrinsic::nearbyint: case Intrinsic::rint: { // floor (sitofp x) -> sitofp x @@ -5288,7 +5462,12 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { } Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) { - Value *Callee = Call->getCalledValue(); + Value *Callee = Call->getCalledOperand(); + + // musttail calls can only be simplified if they are also DCEd. + // As we can't guarantee this here, don't simplify them. + if (Call->isMustTailCall()) + return nullptr; // call undef -> undef // call null -> undef @@ -5311,8 +5490,11 @@ Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) { ConstantArgs.reserve(NumArgs); for (auto &Arg : Call->args()) { Constant *C = dyn_cast<Constant>(&Arg); - if (!C) + if (!C) { + if (isa<MetadataAsValue>(Arg.get())) + continue; return nullptr; + } ConstantArgs.push_back(C); } @@ -5320,16 +5502,16 @@ Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) { } /// Given operands for a Freeze, see if we can fold the result. -static Value *SimplifyFreezeInst(Value *Op0) { +static Value *SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) { // Use a utility function defined in ValueTracking. - if (llvm::isGuaranteedNotToBeUndefOrPoison(Op0)) + if (llvm::isGuaranteedNotToBeUndefOrPoison(Op0, Q.CxtI, Q.DT)) return Op0; // We have room for improvement. return nullptr; } Value *llvm::SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) { - return ::SimplifyFreezeInst(Op0); + return ::SimplifyFreezeInst(Op0, Q); } /// See if we can compute a simplified version of this instruction. @@ -5463,8 +5645,9 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, } case Instruction::ShuffleVector: { auto *SVI = cast<ShuffleVectorInst>(I); - Result = SimplifyShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1), - SVI->getMask(), SVI->getType(), Q); + Result = + SimplifyShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1), + SVI->getShuffleMask(), SVI->getType(), Q); break; } case Instruction::PHI: @@ -5489,14 +5672,6 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, break; } - // In general, it is possible for computeKnownBits to determine all bits in a - // value even when the operands are not all constants. - if (!Result && I->getType()->isIntOrIntVectorTy()) { - KnownBits Known = computeKnownBits(I, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, ORE); - if (Known.isConstant()) - Result = ConstantInt::get(I->getType(), Known.getConstant()); - } - /// If called on unreachable code, the above logic may report that the /// instruction simplified to itself. Make life easier for users by /// detecting that case here, returning a safe value instead. diff --git a/llvm/lib/Analysis/LazyCallGraph.cpp b/llvm/lib/Analysis/LazyCallGraph.cpp index ef31c1e0ba8ce..efded17cef4e3 100644 --- a/llvm/lib/Analysis/LazyCallGraph.cpp +++ b/llvm/lib/Analysis/LazyCallGraph.cpp @@ -15,8 +15,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/Config/llvm-config.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" @@ -99,8 +99,8 @@ LazyCallGraph::EdgeSequence &LazyCallGraph::Node::populateSlow() { // safety of optimizing a direct call edge. for (BasicBlock &BB : *F) for (Instruction &I : BB) { - if (auto CS = CallSite(&I)) - if (Function *Callee = CS.getCalledFunction()) + if (auto *CB = dyn_cast<CallBase>(&I)) + if (Function *Callee = CB->getCalledFunction()) if (!Callee->isDeclaration()) if (Callees.insert(Callee).second) { Visited.insert(Callee); @@ -146,8 +146,11 @@ LLVM_DUMP_METHOD void LazyCallGraph::Node::dump() const { static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) { LibFunc LF; - // Either this is a normal library function or a "vectorizable" function. - return TLI.getLibFunc(F, LF) || TLI.isFunctionVectorizable(F.getName()); + // Either this is a normal library function or a "vectorizable" + // function. Not using the VFDatabase here because this query + // is related only to libraries handled via the TLI. + return TLI.getLibFunc(F, LF) || + TLI.isKnownVectorFunctionInLibrary(F.getName()); } LazyCallGraph::LazyCallGraph( @@ -211,6 +214,15 @@ LazyCallGraph::LazyCallGraph(LazyCallGraph &&G) updateGraphPtrs(); } +bool LazyCallGraph::invalidate(Module &, const PreservedAnalyses &PA, + ModuleAnalysisManager::Invalidator &) { + // Check whether the analysis, all analyses on functions, or the function's + // CFG have been preserved. + auto PAC = PA.getChecker<llvm::LazyCallGraphAnalysis>(); + return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Module>>() || + PAC.preservedSet<CFGAnalyses>()); +} + LazyCallGraph &LazyCallGraph::operator=(LazyCallGraph &&G) { BPA = std::move(G.BPA); NodeMap = std::move(G.NodeMap); @@ -1553,6 +1565,21 @@ void LazyCallGraph::removeDeadFunction(Function &F) { // allocators. } +void LazyCallGraph::addNewFunctionIntoSCC(Function &NewF, SCC &C) { + addNodeToSCC(C, createNode(NewF)); +} + +void LazyCallGraph::addNewFunctionIntoRefSCC(Function &NewF, RefSCC &RC) { + Node &N = createNode(NewF); + + auto *C = createSCC(RC, SmallVector<Node *, 1>()); + addNodeToSCC(*C, N); + + auto Index = RC.SCCIndices.size(); + RC.SCCIndices[C] = Index; + RC.SCCs.push_back(C); +} + LazyCallGraph::Node &LazyCallGraph::insertInto(Function &F, Node *&MappedN) { return *new (MappedN = BPA.Allocate()) Node(*this, F); } @@ -1567,6 +1594,21 @@ void LazyCallGraph::updateGraphPtrs() { RC->G = this; } +LazyCallGraph::Node &LazyCallGraph::createNode(Function &F) { + assert(!lookup(F) && "node already exists"); + + Node &N = get(F); + NodeMap[&F] = &N; + N.DFSNumber = N.LowLink = -1; + N.populate(); + return N; +} + +void LazyCallGraph::addNodeToSCC(LazyCallGraph::SCC &C, Node &N) { + C.Nodes.push_back(&N); + SCCMap[&N] = &C; +} + template <typename RootsT, typename GetBeginT, typename GetEndT, typename GetNodeT, typename FormSCCCallbackT> void LazyCallGraph::buildGenericSCCs(RootsT &&Roots, GetBeginT &&GetBegin, @@ -1788,11 +1830,12 @@ LazyCallGraphDOTPrinterPass::LazyCallGraphDOTPrinterPass(raw_ostream &OS) : OS(OS) {} static void printNodeDOT(raw_ostream &OS, LazyCallGraph::Node &N) { - std::string Name = "\"" + DOT::EscapeString(N.getFunction().getName()) + "\""; + std::string Name = + "\"" + DOT::EscapeString(std::string(N.getFunction().getName())) + "\""; for (LazyCallGraph::Edge &E : N.populate()) { OS << " " << Name << " -> \"" - << DOT::EscapeString(E.getFunction().getName()) << "\""; + << DOT::EscapeString(std::string(E.getFunction().getName())) << "\""; if (!E.isCall()) // It is a ref edge. OS << " [style=dashed,label=\"ref\"]"; OS << ";\n"; diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index bad2de9e5f5e0..f5ffa7286b3b8 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -96,9 +96,9 @@ static ValueLatticeElement intersect(const ValueLatticeElement &A, const ValueLatticeElement &B) { // Undefined is the strongest state. It means the value is known to be along // an unreachable path. - if (A.isUndefined()) + if (A.isUnknown()) return A; - if (B.isUndefined()) + if (B.isUnknown()) return B; // If we gave up for one, but got a useable fact from the other, use it. @@ -121,11 +121,12 @@ static ValueLatticeElement intersect(const ValueLatticeElement &A, // Intersect two constant ranges ConstantRange Range = - A.getConstantRange().intersectWith(B.getConstantRange()); - // Note: An empty range is implicitly converted to overdefined internally. - // TODO: We could instead use Undefined here since we've proven a conflict - // and thus know this path must be unreachable. - return ValueLatticeElement::getRange(std::move(Range)); + A.getConstantRange().intersectWith(B.getConstantRange()); + // Note: An empty range is implicitly converted to unknown or undef depending + // on MayIncludeUndef internally. + return ValueLatticeElement::getRange( + std::move(Range), /*MayIncludeUndef=*/A.isConstantRangeIncludingUndef() | + B.isConstantRangeIncludingUndef()); } //===----------------------------------------------------------------------===// @@ -136,12 +137,9 @@ namespace { /// A callback value handle updates the cache when values are erased. class LazyValueInfoCache; struct LVIValueHandle final : public CallbackVH { - // Needs to access getValPtr(), which is protected. - friend struct DenseMapInfo<LVIValueHandle>; - LazyValueInfoCache *Parent; - LVIValueHandle(Value *V, LazyValueInfoCache *P) + LVIValueHandle(Value *V, LazyValueInfoCache *P = nullptr) : CallbackVH(V), Parent(P) { } void deleted() override; @@ -155,89 +153,77 @@ namespace { /// This is the cache kept by LazyValueInfo which /// maintains information about queries across the clients' queries. class LazyValueInfoCache { - /// This is all of the cached block information for exactly one Value*. - /// The entries are sorted by the BasicBlock* of the - /// entries, allowing us to do a lookup with a binary search. - /// Over-defined lattice values are recorded in OverDefinedCache to reduce - /// memory overhead. - struct ValueCacheEntryTy { - ValueCacheEntryTy(Value *V, LazyValueInfoCache *P) : Handle(V, P) {} - LVIValueHandle Handle; - SmallDenseMap<PoisoningVH<BasicBlock>, ValueLatticeElement, 4> BlockVals; + /// This is all of the cached information for one basic block. It contains + /// the per-value lattice elements, as well as a separate set for + /// overdefined values to reduce memory usage. + struct BlockCacheEntry { + SmallDenseMap<AssertingVH<Value>, ValueLatticeElement, 4> LatticeElements; + SmallDenseSet<AssertingVH<Value>, 4> OverDefined; }; - /// This tracks, on a per-block basis, the set of values that are - /// over-defined at the end of that block. - typedef DenseMap<PoisoningVH<BasicBlock>, SmallPtrSet<Value *, 4>> - OverDefinedCacheTy; - /// Keep track of all blocks that we have ever seen, so we - /// don't spend time removing unused blocks from our caches. - DenseSet<PoisoningVH<BasicBlock> > SeenBlocks; + /// Cached information per basic block. + DenseMap<PoisoningVH<BasicBlock>, std::unique_ptr<BlockCacheEntry>> + BlockCache; + /// Set of value handles used to erase values from the cache on deletion. + DenseSet<LVIValueHandle, DenseMapInfo<Value *>> ValueHandles; + + const BlockCacheEntry *getBlockEntry(BasicBlock *BB) const { + auto It = BlockCache.find_as(BB); + if (It == BlockCache.end()) + return nullptr; + return It->second.get(); + } + + BlockCacheEntry *getOrCreateBlockEntry(BasicBlock *BB) { + auto It = BlockCache.find_as(BB); + if (It == BlockCache.end()) + It = BlockCache.insert({ BB, std::make_unique<BlockCacheEntry>() }) + .first; - /// This is all of the cached information for all values, - /// mapped from Value* to key information. - DenseMap<Value *, std::unique_ptr<ValueCacheEntryTy>> ValueCache; - OverDefinedCacheTy OverDefinedCache; + return It->second.get(); + } + void addValueHandle(Value *Val) { + auto HandleIt = ValueHandles.find_as(Val); + if (HandleIt == ValueHandles.end()) + ValueHandles.insert({ Val, this }); + } public: void insertResult(Value *Val, BasicBlock *BB, const ValueLatticeElement &Result) { - SeenBlocks.insert(BB); + BlockCacheEntry *Entry = getOrCreateBlockEntry(BB); // Insert over-defined values into their own cache to reduce memory // overhead. if (Result.isOverdefined()) - OverDefinedCache[BB].insert(Val); - else { - auto It = ValueCache.find_as(Val); - if (It == ValueCache.end()) { - ValueCache[Val] = std::make_unique<ValueCacheEntryTy>(Val, this); - It = ValueCache.find_as(Val); - assert(It != ValueCache.end() && "Val was just added to the map!"); - } - It->second->BlockVals[BB] = Result; - } - } - - bool isOverdefined(Value *V, BasicBlock *BB) const { - auto ODI = OverDefinedCache.find(BB); - - if (ODI == OverDefinedCache.end()) - return false; + Entry->OverDefined.insert(Val); + else + Entry->LatticeElements.insert({ Val, Result }); - return ODI->second.count(V); + addValueHandle(Val); } - bool hasCachedValueInfo(Value *V, BasicBlock *BB) const { - if (isOverdefined(V, BB)) - return true; - - auto I = ValueCache.find_as(V); - if (I == ValueCache.end()) - return false; - - return I->second->BlockVals.count(BB); - } + Optional<ValueLatticeElement> getCachedValueInfo(Value *V, + BasicBlock *BB) const { + const BlockCacheEntry *Entry = getBlockEntry(BB); + if (!Entry) + return None; - ValueLatticeElement getCachedValueInfo(Value *V, BasicBlock *BB) const { - if (isOverdefined(V, BB)) + if (Entry->OverDefined.count(V)) return ValueLatticeElement::getOverdefined(); - auto I = ValueCache.find_as(V); - if (I == ValueCache.end()) - return ValueLatticeElement(); - auto BBI = I->second->BlockVals.find(BB); - if (BBI == I->second->BlockVals.end()) - return ValueLatticeElement(); - return BBI->second; + auto LatticeIt = Entry->LatticeElements.find_as(V); + if (LatticeIt == Entry->LatticeElements.end()) + return None; + + return LatticeIt->second; } /// clear - Empty the cache. void clear() { - SeenBlocks.clear(); - ValueCache.clear(); - OverDefinedCache.clear(); + BlockCache.clear(); + ValueHandles.clear(); } /// Inform the cache that a given value has been deleted. @@ -251,23 +237,18 @@ namespace { /// OldSucc might have (unless also overdefined in NewSucc). This just /// flushes elements from the cache and does not add any. void threadEdgeImpl(BasicBlock *OldSucc,BasicBlock *NewSucc); - - friend struct LVIValueHandle; }; } void LazyValueInfoCache::eraseValue(Value *V) { - for (auto I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E;) { - // Copy and increment the iterator immediately so we can erase behind - // ourselves. - auto Iter = I++; - SmallPtrSetImpl<Value *> &ValueSet = Iter->second; - ValueSet.erase(V); - if (ValueSet.empty()) - OverDefinedCache.erase(Iter); + for (auto &Pair : BlockCache) { + Pair.second->LatticeElements.erase(V); + Pair.second->OverDefined.erase(V); } - ValueCache.erase(V); + auto HandleIt = ValueHandles.find_as(V); + if (HandleIt != ValueHandles.end()) + ValueHandles.erase(HandleIt); } void LVIValueHandle::deleted() { @@ -277,18 +258,7 @@ void LVIValueHandle::deleted() { } void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { - // Shortcut if we have never seen this block. - DenseSet<PoisoningVH<BasicBlock> >::iterator I = SeenBlocks.find(BB); - if (I == SeenBlocks.end()) - return; - SeenBlocks.erase(I); - - auto ODI = OverDefinedCache.find(BB); - if (ODI != OverDefinedCache.end()) - OverDefinedCache.erase(ODI); - - for (auto &I : ValueCache) - I.second->BlockVals.erase(BB); + BlockCache.erase(BB); } void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc, @@ -306,10 +276,11 @@ void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc, std::vector<BasicBlock*> worklist; worklist.push_back(OldSucc); - auto I = OverDefinedCache.find(OldSucc); - if (I == OverDefinedCache.end()) + const BlockCacheEntry *Entry = getBlockEntry(OldSucc); + if (!Entry || Entry->OverDefined.empty()) return; // Nothing to process here. - SmallVector<Value *, 4> ValsToClear(I->second.begin(), I->second.end()); + SmallVector<Value *, 4> ValsToClear(Entry->OverDefined.begin(), + Entry->OverDefined.end()); // Use a worklist to perform a depth-first search of OldSucc's successors. // NOTE: We do not need a visited list since any blocks we have already @@ -323,10 +294,10 @@ void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc, if (ToUpdate == NewSucc) continue; // If a value was marked overdefined in OldSucc, and is here too... - auto OI = OverDefinedCache.find(ToUpdate); - if (OI == OverDefinedCache.end()) + auto OI = BlockCache.find_as(ToUpdate); + if (OI == BlockCache.end() || OI->second->OverDefined.empty()) continue; - SmallPtrSetImpl<Value *> &ValueSet = OI->second; + auto &ValueSet = OI->second->OverDefined; bool changed = false; for (Value *V : ValsToClear) { @@ -336,11 +307,6 @@ void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc, // If we removed anything, then we potentially need to update // blocks successors too. changed = true; - - if (ValueSet.empty()) { - OverDefinedCache.erase(OI); - break; - } } if (!changed) continue; @@ -357,156 +323,137 @@ class LazyValueInfoImpl; class LazyValueInfoAnnotatedWriter : public AssemblyAnnotationWriter { LazyValueInfoImpl *LVIImpl; // While analyzing which blocks we can solve values for, we need the dominator - // information. Since this is an optional parameter in LVI, we require this - // DomTreeAnalysis pass in the printer pass, and pass the dominator - // tree to the LazyValueInfoAnnotatedWriter. + // information. DominatorTree &DT; public: LazyValueInfoAnnotatedWriter(LazyValueInfoImpl *L, DominatorTree &DTree) : LVIImpl(L), DT(DTree) {} - virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, - formatted_raw_ostream &OS); + void emitBasicBlockStartAnnot(const BasicBlock *BB, + formatted_raw_ostream &OS) override; - virtual void emitInstructionAnnot(const Instruction *I, - formatted_raw_ostream &OS); + void emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS) override; }; } namespace { - // The actual implementation of the lazy analysis and update. Note that the - // inheritance from LazyValueInfoCache is intended to be temporary while - // splitting the code and then transitioning to a has-a relationship. - class LazyValueInfoImpl { - - /// Cached results from previous queries - LazyValueInfoCache TheCache; - - /// This stack holds the state of the value solver during a query. - /// It basically emulates the callstack of the naive - /// recursive value lookup process. - SmallVector<std::pair<BasicBlock*, Value*>, 8> BlockValueStack; - - /// Keeps track of which block-value pairs are in BlockValueStack. - DenseSet<std::pair<BasicBlock*, Value*> > BlockValueSet; - - /// Push BV onto BlockValueStack unless it's already in there. - /// Returns true on success. - bool pushBlockValue(const std::pair<BasicBlock *, Value *> &BV) { - if (!BlockValueSet.insert(BV).second) - return false; // It's already in the stack. - - LLVM_DEBUG(dbgs() << "PUSH: " << *BV.second << " in " - << BV.first->getName() << "\n"); - BlockValueStack.push_back(BV); - return true; - } +// The actual implementation of the lazy analysis and update. Note that the +// inheritance from LazyValueInfoCache is intended to be temporary while +// splitting the code and then transitioning to a has-a relationship. +class LazyValueInfoImpl { + + /// Cached results from previous queries + LazyValueInfoCache TheCache; + + /// This stack holds the state of the value solver during a query. + /// It basically emulates the callstack of the naive + /// recursive value lookup process. + SmallVector<std::pair<BasicBlock*, Value*>, 8> BlockValueStack; + + /// Keeps track of which block-value pairs are in BlockValueStack. + DenseSet<std::pair<BasicBlock*, Value*> > BlockValueSet; + + /// Push BV onto BlockValueStack unless it's already in there. + /// Returns true on success. + bool pushBlockValue(const std::pair<BasicBlock *, Value *> &BV) { + if (!BlockValueSet.insert(BV).second) + return false; // It's already in the stack. + + LLVM_DEBUG(dbgs() << "PUSH: " << *BV.second << " in " + << BV.first->getName() << "\n"); + BlockValueStack.push_back(BV); + return true; + } + + AssumptionCache *AC; ///< A pointer to the cache of @llvm.assume calls. + const DataLayout &DL; ///< A mandatory DataLayout - AssumptionCache *AC; ///< A pointer to the cache of @llvm.assume calls. - const DataLayout &DL; ///< A mandatory DataLayout - DominatorTree *DT; ///< An optional DT pointer. - DominatorTree *DisabledDT; ///< Stores DT if it's disabled. + /// Declaration of the llvm.experimental.guard() intrinsic, + /// if it exists in the module. + Function *GuardDecl; - ValueLatticeElement getBlockValue(Value *Val, BasicBlock *BB); - bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T, - ValueLatticeElement &Result, Instruction *CxtI = nullptr); - bool hasBlockValue(Value *Val, BasicBlock *BB); + Optional<ValueLatticeElement> getBlockValue(Value *Val, BasicBlock *BB); + Optional<ValueLatticeElement> getEdgeValue(Value *V, BasicBlock *F, + BasicBlock *T, Instruction *CxtI = nullptr); // These methods process one work item and may add more. A false value // returned means that the work item was not completely processed and must // be revisited after going through the new items. bool solveBlockValue(Value *Val, BasicBlock *BB); - bool solveBlockValueImpl(ValueLatticeElement &Res, Value *Val, - BasicBlock *BB); - bool solveBlockValueNonLocal(ValueLatticeElement &BBLV, Value *Val, - BasicBlock *BB); - bool solveBlockValuePHINode(ValueLatticeElement &BBLV, PHINode *PN, - BasicBlock *BB); - bool solveBlockValueSelect(ValueLatticeElement &BBLV, SelectInst *S, - BasicBlock *BB); + Optional<ValueLatticeElement> solveBlockValueImpl(Value *Val, BasicBlock *BB); + Optional<ValueLatticeElement> solveBlockValueNonLocal(Value *Val, + BasicBlock *BB); + Optional<ValueLatticeElement> solveBlockValuePHINode(PHINode *PN, + BasicBlock *BB); + Optional<ValueLatticeElement> solveBlockValueSelect(SelectInst *S, + BasicBlock *BB); Optional<ConstantRange> getRangeForOperand(unsigned Op, Instruction *I, BasicBlock *BB); - bool solveBlockValueBinaryOpImpl( - ValueLatticeElement &BBLV, Instruction *I, BasicBlock *BB, + Optional<ValueLatticeElement> solveBlockValueBinaryOpImpl( + Instruction *I, BasicBlock *BB, std::function<ConstantRange(const ConstantRange &, const ConstantRange &)> OpFn); - bool solveBlockValueBinaryOp(ValueLatticeElement &BBLV, BinaryOperator *BBI, - BasicBlock *BB); - bool solveBlockValueCast(ValueLatticeElement &BBLV, CastInst *CI, - BasicBlock *BB); - bool solveBlockValueOverflowIntrinsic( - ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB); - bool solveBlockValueSaturatingIntrinsic(ValueLatticeElement &BBLV, - SaturatingInst *SI, BasicBlock *BB); - bool solveBlockValueIntrinsic(ValueLatticeElement &BBLV, IntrinsicInst *II, - BasicBlock *BB); - bool solveBlockValueExtractValue(ValueLatticeElement &BBLV, - ExtractValueInst *EVI, BasicBlock *BB); + Optional<ValueLatticeElement> solveBlockValueBinaryOp(BinaryOperator *BBI, + BasicBlock *BB); + Optional<ValueLatticeElement> solveBlockValueCast(CastInst *CI, + BasicBlock *BB); + Optional<ValueLatticeElement> solveBlockValueOverflowIntrinsic( + WithOverflowInst *WO, BasicBlock *BB); + Optional<ValueLatticeElement> solveBlockValueSaturatingIntrinsic( + SaturatingInst *SI, BasicBlock *BB); + Optional<ValueLatticeElement> solveBlockValueIntrinsic(IntrinsicInst *II, + BasicBlock *BB); + Optional<ValueLatticeElement> solveBlockValueExtractValue( + ExtractValueInst *EVI, BasicBlock *BB); void intersectAssumeOrGuardBlockValueConstantRange(Value *Val, ValueLatticeElement &BBLV, Instruction *BBI); void solve(); - public: - /// This is the query interface to determine the lattice - /// value for the specified Value* at the end of the specified block. - ValueLatticeElement getValueInBlock(Value *V, BasicBlock *BB, - Instruction *CxtI = nullptr); - - /// This is the query interface to determine the lattice - /// value for the specified Value* at the specified instruction (generally - /// from an assume intrinsic). - ValueLatticeElement getValueAt(Value *V, Instruction *CxtI); - - /// This is the query interface to determine the lattice - /// value for the specified Value* that is true on the specified edge. - ValueLatticeElement getValueOnEdge(Value *V, BasicBlock *FromBB, - BasicBlock *ToBB, - Instruction *CxtI = nullptr); - - /// Complete flush all previously computed values - void clear() { - TheCache.clear(); - } - - /// Printing the LazyValueInfo Analysis. - void printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS) { - LazyValueInfoAnnotatedWriter Writer(this, DTree); - F.print(OS, &Writer); - } - - /// This is part of the update interface to inform the cache - /// that a block has been deleted. - void eraseBlock(BasicBlock *BB) { - TheCache.eraseBlock(BB); - } +public: + /// This is the query interface to determine the lattice + /// value for the specified Value* at the end of the specified block. + ValueLatticeElement getValueInBlock(Value *V, BasicBlock *BB, + Instruction *CxtI = nullptr); + + /// This is the query interface to determine the lattice + /// value for the specified Value* at the specified instruction (generally + /// from an assume intrinsic). + ValueLatticeElement getValueAt(Value *V, Instruction *CxtI); + + /// This is the query interface to determine the lattice + /// value for the specified Value* that is true on the specified edge. + ValueLatticeElement getValueOnEdge(Value *V, BasicBlock *FromBB, + BasicBlock *ToBB, + Instruction *CxtI = nullptr); + + /// Complete flush all previously computed values + void clear() { + TheCache.clear(); + } - /// Disables use of the DominatorTree within LVI. - void disableDT() { - if (DT) { - assert(!DisabledDT && "Both DT and DisabledDT are not nullptr!"); - std::swap(DT, DisabledDT); - } - } + /// Printing the LazyValueInfo Analysis. + void printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS) { + LazyValueInfoAnnotatedWriter Writer(this, DTree); + F.print(OS, &Writer); + } - /// Enables use of the DominatorTree within LVI. Does nothing if the class - /// instance was initialized without a DT pointer. - void enableDT() { - if (DisabledDT) { - assert(!DT && "Both DT and DisabledDT are not nullptr!"); - std::swap(DT, DisabledDT); - } - } + /// This is part of the update interface to inform the cache + /// that a block has been deleted. + void eraseBlock(BasicBlock *BB) { + TheCache.eraseBlock(BB); + } - /// This is the update interface to inform the cache that an edge from - /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc. - void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc); + /// This is the update interface to inform the cache that an edge from + /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc. + void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc); - LazyValueInfoImpl(AssumptionCache *AC, const DataLayout &DL, - DominatorTree *DT = nullptr) - : AC(AC), DL(DL), DT(DT), DisabledDT(nullptr) {} - }; + LazyValueInfoImpl(AssumptionCache *AC, const DataLayout &DL, + Function *GuardDecl) + : AC(AC), DL(DL), GuardDecl(GuardDecl) {} +}; } // end anonymous namespace @@ -545,12 +492,14 @@ void LazyValueInfoImpl::solve() { if (solveBlockValue(e.second, e.first)) { // The work item was completely processed. assert(BlockValueStack.back() == e && "Nothing should have been pushed!"); - assert(TheCache.hasCachedValueInfo(e.second, e.first) && - "Result should be in cache!"); - +#ifndef NDEBUG + Optional<ValueLatticeElement> BBLV = + TheCache.getCachedValueInfo(e.second, e.first); + assert(BBLV && "Result should be in cache!"); LLVM_DEBUG( dbgs() << "POP " << *e.second << " in " << e.first->getName() << " = " - << TheCache.getCachedValueInfo(e.second, e.first) << "\n"); + << *BBLV << "\n"); +#endif BlockValueStack.pop_back(); BlockValueSet.erase(e); @@ -561,21 +510,22 @@ void LazyValueInfoImpl::solve() { } } -bool LazyValueInfoImpl::hasBlockValue(Value *Val, BasicBlock *BB) { - // If already a constant, there is nothing to compute. - if (isa<Constant>(Val)) - return true; - - return TheCache.hasCachedValueInfo(Val, BB); -} - -ValueLatticeElement LazyValueInfoImpl::getBlockValue(Value *Val, - BasicBlock *BB) { +Optional<ValueLatticeElement> LazyValueInfoImpl::getBlockValue(Value *Val, + BasicBlock *BB) { // If already a constant, there is nothing to compute. if (Constant *VC = dyn_cast<Constant>(Val)) return ValueLatticeElement::get(VC); - return TheCache.getCachedValueInfo(Val, BB); + if (Optional<ValueLatticeElement> OptLatticeVal = + TheCache.getCachedValueInfo(Val, BB)) + return OptLatticeVal; + + // We have hit a cycle, assume overdefined. + if (!pushBlockValue({ BB, Val })) + return ValueLatticeElement::getOverdefined(); + + // Yet to be resolved. + return None; } static ValueLatticeElement getFromRangeMetadata(Instruction *BBI) { @@ -596,43 +546,32 @@ static ValueLatticeElement getFromRangeMetadata(Instruction *BBI) { } bool LazyValueInfoImpl::solveBlockValue(Value *Val, BasicBlock *BB) { - if (isa<Constant>(Val)) - return true; - - if (TheCache.hasCachedValueInfo(Val, BB)) { - // If we have a cached value, use that. - LLVM_DEBUG(dbgs() << " reuse BB '" << BB->getName() << "' val=" - << TheCache.getCachedValueInfo(Val, BB) << '\n'); - - // Since we're reusing a cached value, we don't need to update the - // OverDefinedCache. The cache will have been properly updated whenever the - // cached value was inserted. - return true; - } + assert(!isa<Constant>(Val) && "Value should not be constant"); + assert(!TheCache.getCachedValueInfo(Val, BB) && + "Value should not be in cache"); // Hold off inserting this value into the Cache in case we have to return // false and come back later. - ValueLatticeElement Res; - if (!solveBlockValueImpl(Res, Val, BB)) + Optional<ValueLatticeElement> Res = solveBlockValueImpl(Val, BB); + if (!Res) // Work pushed, will revisit return false; - TheCache.insertResult(Val, BB, Res); + TheCache.insertResult(Val, BB, *Res); return true; } -bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res, - Value *Val, BasicBlock *BB) { - +Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueImpl( + Value *Val, BasicBlock *BB) { Instruction *BBI = dyn_cast<Instruction>(Val); if (!BBI || BBI->getParent() != BB) - return solveBlockValueNonLocal(Res, Val, BB); + return solveBlockValueNonLocal(Val, BB); if (PHINode *PN = dyn_cast<PHINode>(BBI)) - return solveBlockValuePHINode(Res, PN, BB); + return solveBlockValuePHINode(PN, BB); if (auto *SI = dyn_cast<SelectInst>(BBI)) - return solveBlockValueSelect(Res, SI, BB); + return solveBlockValueSelect(SI, BB); // If this value is a nonnull pointer, record it's range and bailout. Note // that for all other pointer typed values, we terminate the search at the @@ -644,28 +583,26 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res, // instruction is placed, even if it could legally be hoisted much higher. // That is unfortunate. PointerType *PT = dyn_cast<PointerType>(BBI->getType()); - if (PT && isKnownNonZero(BBI, DL)) { - Res = ValueLatticeElement::getNot(ConstantPointerNull::get(PT)); - return true; - } + if (PT && isKnownNonZero(BBI, DL)) + return ValueLatticeElement::getNot(ConstantPointerNull::get(PT)); + if (BBI->getType()->isIntegerTy()) { if (auto *CI = dyn_cast<CastInst>(BBI)) - return solveBlockValueCast(Res, CI, BB); + return solveBlockValueCast(CI, BB); if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI)) - return solveBlockValueBinaryOp(Res, BO, BB); + return solveBlockValueBinaryOp(BO, BB); if (auto *EVI = dyn_cast<ExtractValueInst>(BBI)) - return solveBlockValueExtractValue(Res, EVI, BB); + return solveBlockValueExtractValue(EVI, BB); if (auto *II = dyn_cast<IntrinsicInst>(BBI)) - return solveBlockValueIntrinsic(Res, II, BB); + return solveBlockValueIntrinsic(II, BB); } LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - unknown inst def found.\n"); - Res = getFromRangeMetadata(BBI); - return true; + return getFromRangeMetadata(BBI); } static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) { @@ -717,8 +654,8 @@ static bool isObjectDereferencedInBlock(Value *Val, BasicBlock *BB) { return false; } -bool LazyValueInfoImpl::solveBlockValueNonLocal(ValueLatticeElement &BBLV, - Value *Val, BasicBlock *BB) { +Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueNonLocal( + Value *Val, BasicBlock *BB) { ValueLatticeElement Result; // Start Undefined. // If this is the entry block, we must be asking about an argument. The @@ -731,13 +668,10 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(ValueLatticeElement &BBLV, if (PTy && (isKnownNonZero(Val, DL) || (isObjectDereferencedInBlock(Val, BB) && - !NullPointerIsDefined(BB->getParent(), PTy->getAddressSpace())))) { - Result = ValueLatticeElement::getNot(ConstantPointerNull::get(PTy)); - } else { - Result = ValueLatticeElement::getOverdefined(); - } - BBLV = Result; - return true; + !NullPointerIsDefined(BB->getParent(), PTy->getAddressSpace())))) + return ValueLatticeElement::getNot(ConstantPointerNull::get(PTy)); + else + return ValueLatticeElement::getOverdefined(); } // Loop over all of our predecessors, merging what we know from them into @@ -750,12 +684,12 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(ValueLatticeElement &BBLV, // canonicalizing to make this true rather than relying on this happy // accident. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - ValueLatticeElement EdgeResult; - if (!getEdgeValue(Val, *PI, BB, EdgeResult)) + Optional<ValueLatticeElement> EdgeResult = getEdgeValue(Val, *PI, BB); + if (!EdgeResult) // Explore that input, then return here - return false; + return None; - Result.mergeIn(EdgeResult, DL); + Result.mergeIn(*EdgeResult); // If we hit overdefined, exit early. The BlockVals entry is already set // to overdefined. @@ -770,19 +704,17 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(ValueLatticeElement &BBLV, Result = ValueLatticeElement::getNot(ConstantPointerNull::get(PTy)); } - BBLV = Result; - return true; + return Result; } } // Return the merged value, which is more precise than 'overdefined'. assert(!Result.isOverdefined()); - BBLV = Result; - return true; + return Result; } -bool LazyValueInfoImpl::solveBlockValuePHINode(ValueLatticeElement &BBLV, - PHINode *PN, BasicBlock *BB) { +Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValuePHINode( + PHINode *PN, BasicBlock *BB) { ValueLatticeElement Result; // Start Undefined. // Loop over all of our predecessors, merging what we know from them into @@ -791,15 +723,16 @@ bool LazyValueInfoImpl::solveBlockValuePHINode(ValueLatticeElement &BBLV, for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *PhiBB = PN->getIncomingBlock(i); Value *PhiVal = PN->getIncomingValue(i); - ValueLatticeElement EdgeResult; // Note that we can provide PN as the context value to getEdgeValue, even // though the results will be cached, because PN is the value being used as // the cache key in the caller. - if (!getEdgeValue(PhiVal, PhiBB, BB, EdgeResult, PN)) + Optional<ValueLatticeElement> EdgeResult = + getEdgeValue(PhiVal, PhiBB, BB, PN); + if (!EdgeResult) // Explore that input, then return here - return false; + return None; - Result.mergeIn(EdgeResult, DL); + Result.mergeIn(*EdgeResult); // If we hit overdefined, exit early. The BlockVals entry is already set // to overdefined. @@ -807,15 +740,13 @@ bool LazyValueInfoImpl::solveBlockValuePHINode(ValueLatticeElement &BBLV, LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined because of pred (local).\n"); - BBLV = Result; - return true; + return Result; } } // Return the merged value, which is more precise than 'overdefined'. assert(!Result.isOverdefined() && "Possible PHI in entry block?"); - BBLV = Result; - return true; + return Result; } static ValueLatticeElement getValueFromCondition(Value *Val, Value *Cond, @@ -829,63 +760,59 @@ void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange( if (!BBI) return; + BasicBlock *BB = BBI->getParent(); for (auto &AssumeVH : AC->assumptionsFor(Val)) { if (!AssumeVH) continue; + + // Only check assumes in the block of the context instruction. Other + // assumes will have already been taken into account when the value was + // propagated from predecessor blocks. auto *I = cast<CallInst>(AssumeVH); - if (!isValidAssumeForContext(I, BBI, DT)) + if (I->getParent() != BB || !isValidAssumeForContext(I, BBI)) continue; BBLV = intersect(BBLV, getValueFromCondition(Val, I->getArgOperand(0))); } // If guards are not used in the module, don't spend time looking for them - auto *GuardDecl = BBI->getModule()->getFunction( - Intrinsic::getName(Intrinsic::experimental_guard)); if (!GuardDecl || GuardDecl->use_empty()) return; - if (BBI->getIterator() == BBI->getParent()->begin()) + if (BBI->getIterator() == BB->begin()) return; for (Instruction &I : make_range(std::next(BBI->getIterator().getReverse()), - BBI->getParent()->rend())) { + BB->rend())) { Value *Cond = nullptr; if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(Cond)))) BBLV = intersect(BBLV, getValueFromCondition(Val, Cond)); } } -bool LazyValueInfoImpl::solveBlockValueSelect(ValueLatticeElement &BBLV, - SelectInst *SI, BasicBlock *BB) { - +Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueSelect( + SelectInst *SI, BasicBlock *BB) { // Recurse on our inputs if needed - if (!hasBlockValue(SI->getTrueValue(), BB)) { - if (pushBlockValue(std::make_pair(BB, SI->getTrueValue()))) - return false; - BBLV = ValueLatticeElement::getOverdefined(); - return true; - } - ValueLatticeElement TrueVal = getBlockValue(SI->getTrueValue(), BB); + Optional<ValueLatticeElement> OptTrueVal = + getBlockValue(SI->getTrueValue(), BB); + if (!OptTrueVal) + return None; + ValueLatticeElement &TrueVal = *OptTrueVal; + // If we hit overdefined, don't ask more queries. We want to avoid poisoning // extra slots in the table if we can. - if (TrueVal.isOverdefined()) { - BBLV = ValueLatticeElement::getOverdefined(); - return true; - } + if (TrueVal.isOverdefined()) + return ValueLatticeElement::getOverdefined(); + + Optional<ValueLatticeElement> OptFalseVal = + getBlockValue(SI->getFalseValue(), BB); + if (!OptFalseVal) + return None; + ValueLatticeElement &FalseVal = *OptFalseVal; - if (!hasBlockValue(SI->getFalseValue(), BB)) { - if (pushBlockValue(std::make_pair(BB, SI->getFalseValue()))) - return false; - BBLV = ValueLatticeElement::getOverdefined(); - return true; - } - ValueLatticeElement FalseVal = getBlockValue(SI->getFalseValue(), BB); // If we hit overdefined, don't ask more queries. We want to avoid poisoning // extra slots in the table if we can. - if (FalseVal.isOverdefined()) { - BBLV = ValueLatticeElement::getOverdefined(); - return true; - } + if (FalseVal.isOverdefined()) + return ValueLatticeElement::getOverdefined(); if (TrueVal.isConstantRange() && FalseVal.isConstantRange()) { const ConstantRange &TrueCR = TrueVal.getConstantRange(); @@ -911,31 +838,28 @@ bool LazyValueInfoImpl::solveBlockValueSelect(ValueLatticeElement &BBLV, return TrueCR.umax(FalseCR); }; }(); - BBLV = ValueLatticeElement::getRange(ResultCR); - return true; + return ValueLatticeElement::getRange( + ResultCR, TrueVal.isConstantRangeIncludingUndef() | + FalseVal.isConstantRangeIncludingUndef()); } if (SPR.Flavor == SPF_ABS) { - if (LHS == SI->getTrueValue()) { - BBLV = ValueLatticeElement::getRange(TrueCR.abs()); - return true; - } - if (LHS == SI->getFalseValue()) { - BBLV = ValueLatticeElement::getRange(FalseCR.abs()); - return true; - } + if (LHS == SI->getTrueValue()) + return ValueLatticeElement::getRange( + TrueCR.abs(), TrueVal.isConstantRangeIncludingUndef()); + if (LHS == SI->getFalseValue()) + return ValueLatticeElement::getRange( + FalseCR.abs(), FalseVal.isConstantRangeIncludingUndef()); } if (SPR.Flavor == SPF_NABS) { ConstantRange Zero(APInt::getNullValue(TrueCR.getBitWidth())); - if (LHS == SI->getTrueValue()) { - BBLV = ValueLatticeElement::getRange(Zero.sub(TrueCR.abs())); - return true; - } - if (LHS == SI->getFalseValue()) { - BBLV = ValueLatticeElement::getRange(Zero.sub(FalseCR.abs())); - return true; - } + if (LHS == SI->getTrueValue()) + return ValueLatticeElement::getRange( + Zero.sub(TrueCR.abs()), FalseVal.isConstantRangeIncludingUndef()); + if (LHS == SI->getFalseValue()) + return ValueLatticeElement::getRange( + Zero.sub(FalseCR.abs()), FalseVal.isConstantRangeIncludingUndef()); } } @@ -990,41 +914,34 @@ bool LazyValueInfoImpl::solveBlockValueSelect(ValueLatticeElement &BBLV, } } - ValueLatticeElement Result; // Start Undefined. - Result.mergeIn(TrueVal, DL); - Result.mergeIn(FalseVal, DL); - BBLV = Result; - return true; + ValueLatticeElement Result = TrueVal; + Result.mergeIn(FalseVal); + return Result; } Optional<ConstantRange> LazyValueInfoImpl::getRangeForOperand(unsigned Op, Instruction *I, BasicBlock *BB) { - if (!hasBlockValue(I->getOperand(Op), BB)) - if (pushBlockValue(std::make_pair(BB, I->getOperand(Op)))) - return None; + Optional<ValueLatticeElement> OptVal = getBlockValue(I->getOperand(Op), BB); + if (!OptVal) + return None; + + ValueLatticeElement &Val = *OptVal; + intersectAssumeOrGuardBlockValueConstantRange(I->getOperand(Op), Val, I); + if (Val.isConstantRange()) + return Val.getConstantRange(); const unsigned OperandBitWidth = DL.getTypeSizeInBits(I->getOperand(Op)->getType()); - ConstantRange Range = ConstantRange::getFull(OperandBitWidth); - if (hasBlockValue(I->getOperand(Op), BB)) { - ValueLatticeElement Val = getBlockValue(I->getOperand(Op), BB); - intersectAssumeOrGuardBlockValueConstantRange(I->getOperand(Op), Val, I); - if (Val.isConstantRange()) - Range = Val.getConstantRange(); - } - return Range; + return ConstantRange::getFull(OperandBitWidth); } -bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV, - CastInst *CI, - BasicBlock *BB) { - if (!CI->getOperand(0)->getType()->isSized()) { - // Without knowing how wide the input is, we can't analyze it in any useful - // way. - BBLV = ValueLatticeElement::getOverdefined(); - return true; - } +Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueCast( + CastInst *CI, BasicBlock *BB) { + // Without knowing how wide the input is, we can't analyze it in any useful + // way. + if (!CI->getOperand(0)->getType()->isSized()) + return ValueLatticeElement::getOverdefined(); // Filter out casts we don't know how to reason about before attempting to // recurse on our operand. This can cut a long search short if we know we're @@ -1039,8 +956,7 @@ bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV, // Unhandled instructions are overdefined. LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined (unknown cast).\n"); - BBLV = ValueLatticeElement::getOverdefined(); - return true; + return ValueLatticeElement::getOverdefined(); } // Figure out the range of the LHS. If that fails, we still apply the @@ -1049,21 +965,20 @@ bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV, Optional<ConstantRange> LHSRes = getRangeForOperand(0, CI, BB); if (!LHSRes.hasValue()) // More work to do before applying this transfer rule. - return false; - ConstantRange LHSRange = LHSRes.getValue(); + return None; + const ConstantRange &LHSRange = LHSRes.getValue(); const unsigned ResultBitWidth = CI->getType()->getIntegerBitWidth(); // NOTE: We're currently limited by the set of operations that ConstantRange // can evaluate symbolically. Enhancing that set will allows us to analyze // more definitions. - BBLV = ValueLatticeElement::getRange(LHSRange.castOp(CI->getOpcode(), + return ValueLatticeElement::getRange(LHSRange.castOp(CI->getOpcode(), ResultBitWidth)); - return true; } -bool LazyValueInfoImpl::solveBlockValueBinaryOpImpl( - ValueLatticeElement &BBLV, Instruction *I, BasicBlock *BB, +Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueBinaryOpImpl( + Instruction *I, BasicBlock *BB, std::function<ConstantRange(const ConstantRange &, const ConstantRange &)> OpFn) { // Figure out the ranges of the operands. If that fails, use a @@ -1074,26 +989,22 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOpImpl( Optional<ConstantRange> RHSRes = getRangeForOperand(1, I, BB); if (!LHSRes.hasValue() || !RHSRes.hasValue()) // More work to do before applying this transfer rule. - return false; + return None; - ConstantRange LHSRange = LHSRes.getValue(); - ConstantRange RHSRange = RHSRes.getValue(); - BBLV = ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange)); - return true; + const ConstantRange &LHSRange = LHSRes.getValue(); + const ConstantRange &RHSRange = RHSRes.getValue(); + return ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange)); } -bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV, - BinaryOperator *BO, - BasicBlock *BB) { - +Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueBinaryOp( + BinaryOperator *BO, BasicBlock *BB) { assert(BO->getOperand(0)->getType()->isSized() && "all operands to binary operators are sized"); if (BO->getOpcode() == Instruction::Xor) { // Xor is the only operation not supported by ConstantRange::binaryOp(). LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined (unknown binary operator).\n"); - BBLV = ValueLatticeElement::getOverdefined(); - return true; + return ValueLatticeElement::getOverdefined(); } if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(BO)) { @@ -1104,47 +1015,49 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV, NoWrapKind |= OverflowingBinaryOperator::NoSignedWrap; return solveBlockValueBinaryOpImpl( - BBLV, BO, BB, + BO, BB, [BO, NoWrapKind](const ConstantRange &CR1, const ConstantRange &CR2) { return CR1.overflowingBinaryOp(BO->getOpcode(), CR2, NoWrapKind); }); } return solveBlockValueBinaryOpImpl( - BBLV, BO, BB, [BO](const ConstantRange &CR1, const ConstantRange &CR2) { + BO, BB, [BO](const ConstantRange &CR1, const ConstantRange &CR2) { return CR1.binaryOp(BO->getOpcode(), CR2); }); } -bool LazyValueInfoImpl::solveBlockValueOverflowIntrinsic( - ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB) { - return solveBlockValueBinaryOpImpl(BBLV, WO, BB, - [WO](const ConstantRange &CR1, const ConstantRange &CR2) { +Optional<ValueLatticeElement> +LazyValueInfoImpl::solveBlockValueOverflowIntrinsic(WithOverflowInst *WO, + BasicBlock *BB) { + return solveBlockValueBinaryOpImpl( + WO, BB, [WO](const ConstantRange &CR1, const ConstantRange &CR2) { return CR1.binaryOp(WO->getBinaryOp(), CR2); }); } -bool LazyValueInfoImpl::solveBlockValueSaturatingIntrinsic( - ValueLatticeElement &BBLV, SaturatingInst *SI, BasicBlock *BB) { +Optional<ValueLatticeElement> +LazyValueInfoImpl::solveBlockValueSaturatingIntrinsic(SaturatingInst *SI, + BasicBlock *BB) { switch (SI->getIntrinsicID()) { case Intrinsic::uadd_sat: return solveBlockValueBinaryOpImpl( - BBLV, SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) { + SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) { return CR1.uadd_sat(CR2); }); case Intrinsic::usub_sat: return solveBlockValueBinaryOpImpl( - BBLV, SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) { + SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) { return CR1.usub_sat(CR2); }); case Intrinsic::sadd_sat: return solveBlockValueBinaryOpImpl( - BBLV, SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) { + SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) { return CR1.sadd_sat(CR2); }); case Intrinsic::ssub_sat: return solveBlockValueBinaryOpImpl( - BBLV, SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) { + SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) { return CR1.ssub_sat(CR2); }); default: @@ -1152,58 +1065,71 @@ bool LazyValueInfoImpl::solveBlockValueSaturatingIntrinsic( } } -bool LazyValueInfoImpl::solveBlockValueIntrinsic(ValueLatticeElement &BBLV, - IntrinsicInst *II, - BasicBlock *BB) { +Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueIntrinsic( + IntrinsicInst *II, BasicBlock *BB) { if (auto *SI = dyn_cast<SaturatingInst>(II)) - return solveBlockValueSaturatingIntrinsic(BBLV, SI, BB); + return solveBlockValueSaturatingIntrinsic(SI, BB); LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined (unknown intrinsic).\n"); - BBLV = ValueLatticeElement::getOverdefined(); - return true; + return ValueLatticeElement::getOverdefined(); } -bool LazyValueInfoImpl::solveBlockValueExtractValue( - ValueLatticeElement &BBLV, ExtractValueInst *EVI, BasicBlock *BB) { +Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueExtractValue( + ExtractValueInst *EVI, BasicBlock *BB) { if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand())) if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0) - return solveBlockValueOverflowIntrinsic(BBLV, WO, BB); + return solveBlockValueOverflowIntrinsic(WO, BB); // Handle extractvalue of insertvalue to allow further simplification // based on replaced with.overflow intrinsics. if (Value *V = SimplifyExtractValueInst( EVI->getAggregateOperand(), EVI->getIndices(), - EVI->getModule()->getDataLayout())) { - if (!hasBlockValue(V, BB)) { - if (pushBlockValue({ BB, V })) - return false; - BBLV = ValueLatticeElement::getOverdefined(); - return true; - } - BBLV = getBlockValue(V, BB); - return true; - } + EVI->getModule()->getDataLayout())) + return getBlockValue(V, BB); LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined (unknown extractvalue).\n"); - BBLV = ValueLatticeElement::getOverdefined(); - return true; + return ValueLatticeElement::getOverdefined(); +} + +static bool matchICmpOperand(const APInt *&Offset, Value *LHS, Value *Val, + ICmpInst::Predicate Pred) { + if (LHS == Val) + return true; + + // Handle range checking idiom produced by InstCombine. We will subtract the + // offset from the allowed range for RHS in this case. + if (match(LHS, m_Add(m_Specific(Val), m_APInt(Offset)))) + return true; + + // If (x | y) < C, then (x < C) && (y < C). + if (match(LHS, m_c_Or(m_Specific(Val), m_Value())) && + (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE)) + return true; + + // If (x & y) > C, then (x > C) && (y > C). + if (match(LHS, m_c_And(m_Specific(Val), m_Value())) && + (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)) + return true; + + return false; } static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, bool isTrueDest) { Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); - CmpInst::Predicate Predicate = ICI->getPredicate(); + + // Get the predicate that must hold along the considered edge. + CmpInst::Predicate EdgePred = + isTrueDest ? ICI->getPredicate() : ICI->getInversePredicate(); if (isa<Constant>(RHS)) { if (ICI->isEquality() && LHS == Val) { - // We know that V has the RHS constant if this is a true SETEQ or - // false SETNE. - if (isTrueDest == (Predicate == ICmpInst::ICMP_EQ)) + if (EdgePred == ICmpInst::ICMP_EQ) return ValueLatticeElement::get(cast<Constant>(RHS)); - else + else if (!isa<UndefValue>(RHS)) return ValueLatticeElement::getNot(cast<Constant>(RHS)); } } @@ -1211,47 +1137,31 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, if (!Val->getType()->isIntegerTy()) return ValueLatticeElement::getOverdefined(); - // Use ConstantRange::makeAllowedICmpRegion in order to determine the possible - // range of Val guaranteed by the condition. Recognize comparisons in the from - // of: - // icmp <pred> Val, ... - // icmp <pred> (add Val, Offset), ... - // The latter is the range checking idiom that InstCombine produces. Subtract - // the offset from the allowed range for RHS in this case. - - // Val or (add Val, Offset) can be on either hand of the comparison - if (LHS != Val && !match(LHS, m_Add(m_Specific(Val), m_ConstantInt()))) { + const APInt *Offset = nullptr; + if (!matchICmpOperand(Offset, LHS, Val, EdgePred)) { std::swap(LHS, RHS); - Predicate = CmpInst::getSwappedPredicate(Predicate); + EdgePred = CmpInst::getSwappedPredicate(EdgePred); + if (!matchICmpOperand(Offset, LHS, Val, EdgePred)) + return ValueLatticeElement::getOverdefined(); } - ConstantInt *Offset = nullptr; - if (LHS != Val) - match(LHS, m_Add(m_Specific(Val), m_ConstantInt(Offset))); - - if (LHS == Val || Offset) { - // Calculate the range of values that are allowed by the comparison - ConstantRange RHSRange(RHS->getType()->getIntegerBitWidth(), - /*isFullSet=*/true); - if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) - RHSRange = ConstantRange(CI->getValue()); - else if (Instruction *I = dyn_cast<Instruction>(RHS)) - if (auto *Ranges = I->getMetadata(LLVMContext::MD_range)) - RHSRange = getConstantRangeFromMetadata(*Ranges); - - // If we're interested in the false dest, invert the condition - CmpInst::Predicate Pred = - isTrueDest ? Predicate : CmpInst::getInversePredicate(Predicate); - ConstantRange TrueValues = - ConstantRange::makeAllowedICmpRegion(Pred, RHSRange); - - if (Offset) // Apply the offset from above. - TrueValues = TrueValues.subtract(Offset->getValue()); - - return ValueLatticeElement::getRange(std::move(TrueValues)); - } + // Calculate the range of values that are allowed by the comparison. + ConstantRange RHSRange(RHS->getType()->getIntegerBitWidth(), + /*isFullSet=*/true); + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) + RHSRange = ConstantRange(CI->getValue()); + else if (Instruction *I = dyn_cast<Instruction>(RHS)) + if (auto *Ranges = I->getMetadata(LLVMContext::MD_range)) + RHSRange = getConstantRangeFromMetadata(*Ranges); - return ValueLatticeElement::getOverdefined(); + // If we're interested in the false dest, invert the condition + ConstantRange TrueValues = + ConstantRange::makeAllowedICmpRegion(EdgePred, RHSRange); + + if (Offset) // Apply the offset from above. + TrueValues = TrueValues.subtract(*Offset); + + return ValueLatticeElement::getRange(std::move(TrueValues)); } // Handle conditions of the form @@ -1278,11 +1188,11 @@ static ValueLatticeElement getValueFromOverflowCondition( static ValueLatticeElement getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest, - DenseMap<Value*, ValueLatticeElement> &Visited); + SmallDenseMap<Value*, ValueLatticeElement> &Visited); static ValueLatticeElement getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest, - DenseMap<Value*, ValueLatticeElement> &Visited) { + SmallDenseMap<Value*, ValueLatticeElement> &Visited) { if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cond)) return getValueFromICmpCondition(Val, ICI, isTrueDest); @@ -1315,7 +1225,7 @@ getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest, static ValueLatticeElement getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest, - DenseMap<Value*, ValueLatticeElement> &Visited) { + SmallDenseMap<Value*, ValueLatticeElement> &Visited) { auto I = Visited.find(Cond); if (I != Visited.end()) return I->second; @@ -1328,7 +1238,7 @@ getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest, ValueLatticeElement getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest) { assert(Cond && "precondition"); - DenseMap<Value*, ValueLatticeElement> Visited; + SmallDenseMap<Value*, ValueLatticeElement> Visited; return getValueFromCondition(Val, Cond, isTrueDest, Visited); } @@ -1380,8 +1290,9 @@ static ValueLatticeElement constantFoldUser(User *Usr, Value *Op, /// Compute the value of Val on the edge BBFrom -> BBTo. Returns false if /// Val is not constrained on the edge. Result is unspecified if return value /// is false. -static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, - BasicBlock *BBTo, ValueLatticeElement &Result) { +static Optional<ValueLatticeElement> getEdgeValueLocal(Value *Val, + BasicBlock *BBFrom, + BasicBlock *BBTo) { // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we // know that v != 0. if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) { @@ -1396,17 +1307,16 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, // If V is the condition of the branch itself, then we know exactly what // it is. - if (Condition == Val) { - Result = ValueLatticeElement::get(ConstantInt::get( + if (Condition == Val) + return ValueLatticeElement::get(ConstantInt::get( Type::getInt1Ty(Val->getContext()), isTrueDest)); - return true; - } // If the condition of the branch is an equality comparison, we may be // able to infer the value. - Result = getValueFromCondition(Val, Condition, isTrueDest); + ValueLatticeElement Result = getValueFromCondition(Val, Condition, + isTrueDest); if (!Result.isOverdefined()) - return true; + return Result; if (User *Usr = dyn_cast<User>(Val)) { assert(Result.isOverdefined() && "Result isn't overdefined"); @@ -1446,7 +1356,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, } } if (!Result.isOverdefined()) - return true; + return Result; } } @@ -1455,7 +1365,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) { Value *Condition = SI->getCondition(); if (!isa<IntegerType>(Val->getType())) - return false; + return None; bool ValUsesConditionAndMayBeFoldable = false; if (Condition != Val) { // Check if Val has Condition as an operand. @@ -1463,7 +1373,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, ValUsesConditionAndMayBeFoldable = isOperationFoldable(Usr) && usesOperand(Usr, Condition); if (!ValUsesConditionAndMayBeFoldable) - return false; + return None; } assert((Condition == Val || ValUsesConditionAndMayBeFoldable) && "Condition != Val nor Val doesn't use Condition"); @@ -1481,7 +1391,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, ValueLatticeElement EdgeLatticeVal = constantFoldUser(Usr, Condition, CaseValue, DL); if (EdgeLatticeVal.isOverdefined()) - return false; + return None; EdgeVal = EdgeLatticeVal.getConstantRange(); } if (DefaultCase) { @@ -1496,46 +1406,31 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, } else if (Case.getCaseSuccessor() == BBTo) EdgesVals = EdgesVals.unionWith(EdgeVal); } - Result = ValueLatticeElement::getRange(std::move(EdgesVals)); - return true; + return ValueLatticeElement::getRange(std::move(EdgesVals)); } - return false; + return None; } /// Compute the value of Val on the edge BBFrom -> BBTo or the value at /// the basic block if the edge does not constrain Val. -bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom, - BasicBlock *BBTo, - ValueLatticeElement &Result, - Instruction *CxtI) { +Optional<ValueLatticeElement> LazyValueInfoImpl::getEdgeValue( + Value *Val, BasicBlock *BBFrom, BasicBlock *BBTo, Instruction *CxtI) { // If already a constant, there is nothing to compute. - if (Constant *VC = dyn_cast<Constant>(Val)) { - Result = ValueLatticeElement::get(VC); - return true; - } - - ValueLatticeElement LocalResult; - if (!getEdgeValueLocal(Val, BBFrom, BBTo, LocalResult)) - // If we couldn't constrain the value on the edge, LocalResult doesn't - // provide any information. - LocalResult = ValueLatticeElement::getOverdefined(); + if (Constant *VC = dyn_cast<Constant>(Val)) + return ValueLatticeElement::get(VC); - if (hasSingleValue(LocalResult)) { + ValueLatticeElement LocalResult = getEdgeValueLocal(Val, BBFrom, BBTo) + .getValueOr(ValueLatticeElement::getOverdefined()); + if (hasSingleValue(LocalResult)) // Can't get any more precise here - Result = LocalResult; - return true; - } + return LocalResult; - if (!hasBlockValue(Val, BBFrom)) { - if (pushBlockValue(std::make_pair(BBFrom, Val))) - return false; - // No new information. - Result = LocalResult; - return true; - } + Optional<ValueLatticeElement> OptInBlock = getBlockValue(Val, BBFrom); + if (!OptInBlock) + return None; + ValueLatticeElement &InBlock = *OptInBlock; // Try to intersect ranges of the BB and the constraint on the edge. - ValueLatticeElement InBlock = getBlockValue(Val, BBFrom); intersectAssumeOrGuardBlockValueConstantRange(Val, InBlock, BBFrom->getTerminator()); // We can use the context instruction (generically the ultimate instruction @@ -1548,8 +1443,7 @@ bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom, // but then the result is not cached. intersectAssumeOrGuardBlockValueConstantRange(Val, InBlock, CxtI); - Result = intersect(LocalResult, InBlock); - return true; + return intersect(LocalResult, InBlock); } ValueLatticeElement LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB, @@ -1558,11 +1452,13 @@ ValueLatticeElement LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB, << BB->getName() << "'\n"); assert(BlockValueStack.empty() && BlockValueSet.empty()); - if (!hasBlockValue(V, BB)) { - pushBlockValue(std::make_pair(BB, V)); + Optional<ValueLatticeElement> OptResult = getBlockValue(V, BB); + if (!OptResult) { solve(); + OptResult = getBlockValue(V, BB); + assert(OptResult && "Value not available after solving"); } - ValueLatticeElement Result = getBlockValue(V, BB); + ValueLatticeElement Result = *OptResult; intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI); LLVM_DEBUG(dbgs() << " Result = " << Result << "\n"); @@ -1592,16 +1488,15 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, << FromBB->getName() << "' to '" << ToBB->getName() << "'\n"); - ValueLatticeElement Result; - if (!getEdgeValue(V, FromBB, ToBB, Result, CxtI)) { + Optional<ValueLatticeElement> Result = getEdgeValue(V, FromBB, ToBB, CxtI); + if (!Result) { solve(); - bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result, CxtI); - (void)WasFastQuery; - assert(WasFastQuery && "More work to do after problem solved?"); + Result = getEdgeValue(V, FromBB, ToBB, CxtI); + assert(Result && "More work to do after problem solved?"); } - LLVM_DEBUG(dbgs() << " Result = " << Result << "\n"); - return Result; + LLVM_DEBUG(dbgs() << " Result = " << *Result << "\n"); + return *Result; } void LazyValueInfoImpl::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, @@ -1615,26 +1510,23 @@ void LazyValueInfoImpl::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, /// This lazily constructs the LazyValueInfoImpl. static LazyValueInfoImpl &getImpl(void *&PImpl, AssumptionCache *AC, - const DataLayout *DL, - DominatorTree *DT = nullptr) { + const Module *M) { if (!PImpl) { - assert(DL && "getCache() called with a null DataLayout"); - PImpl = new LazyValueInfoImpl(AC, *DL, DT); + assert(M && "getCache() called with a null Module"); + const DataLayout &DL = M->getDataLayout(); + Function *GuardDecl = M->getFunction( + Intrinsic::getName(Intrinsic::experimental_guard)); + PImpl = new LazyValueInfoImpl(AC, DL, GuardDecl); } return *static_cast<LazyValueInfoImpl*>(PImpl); } bool LazyValueInfoWrapperPass::runOnFunction(Function &F) { Info.AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - const DataLayout &DL = F.getParent()->getDataLayout(); - - DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - Info.DT = DTWP ? &DTWP->getDomTree() : nullptr; Info.TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); if (Info.PImpl) - getImpl(Info.PImpl, Info.AC, &DL, Info.DT).clear(); + getImpl(Info.PImpl, Info.AC, F.getParent()).clear(); // Fully lazy. return false; @@ -1663,8 +1555,7 @@ bool LazyValueInfo::invalidate(Function &F, const PreservedAnalyses &PA, // We need to invalidate if we have either failed to preserve this analyses // result directly or if any of its dependencies have been invalidated. auto PAC = PA.getChecker<LazyValueAnalysis>(); - if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) || - (DT && Inv.invalidate<DominatorTreeAnalysis>(F, PA))) + if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>())) return true; return false; @@ -1676,9 +1567,8 @@ LazyValueInfo LazyValueAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { auto &AC = FAM.getResult<AssumptionAnalysis>(F); auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); - auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); - return LazyValueInfo(&AC, &F.getParent()->getDataLayout(), &TLI, DT); + return LazyValueInfo(&AC, &F.getParent()->getDataLayout(), &TLI); } /// Returns true if we can statically tell that this value will never be a @@ -1701,9 +1591,8 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB, if (isKnownNonConstant(V)) return nullptr; - const DataLayout &DL = BB->getModule()->getDataLayout(); ValueLatticeElement Result = - getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI); + getImpl(PImpl, AC, BB->getModule()).getValueInBlock(V, BB, CxtI); if (Result.isConstant()) return Result.getConstant(); @@ -1716,16 +1605,16 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB, } ConstantRange LazyValueInfo::getConstantRange(Value *V, BasicBlock *BB, - Instruction *CxtI) { + Instruction *CxtI, + bool UndefAllowed) { assert(V->getType()->isIntegerTy()); unsigned Width = V->getType()->getIntegerBitWidth(); - const DataLayout &DL = BB->getModule()->getDataLayout(); ValueLatticeElement Result = - getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI); - if (Result.isUndefined()) + getImpl(PImpl, AC, BB->getModule()).getValueInBlock(V, BB, CxtI); + if (Result.isUnknown()) return ConstantRange::getEmpty(Width); - if (Result.isConstantRange()) - return Result.getConstantRange(); + if (Result.isConstantRange(UndefAllowed)) + return Result.getConstantRange(UndefAllowed); // We represent ConstantInt constants as constant ranges but other kinds // of integer constants, i.e. ConstantExpr will be tagged as constants assert(!(Result.isConstant() && isa<ConstantInt>(Result.getConstant())) && @@ -1738,9 +1627,9 @@ ConstantRange LazyValueInfo::getConstantRange(Value *V, BasicBlock *BB, Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI) { - const DataLayout &DL = FromBB->getModule()->getDataLayout(); + Module *M = FromBB->getModule(); ValueLatticeElement Result = - getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); + getImpl(PImpl, AC, M).getValueOnEdge(V, FromBB, ToBB, CxtI); if (Result.isConstant()) return Result.getConstant(); @@ -1757,11 +1646,11 @@ ConstantRange LazyValueInfo::getConstantRangeOnEdge(Value *V, BasicBlock *ToBB, Instruction *CxtI) { unsigned Width = V->getType()->getIntegerBitWidth(); - const DataLayout &DL = FromBB->getModule()->getDataLayout(); + Module *M = FromBB->getModule(); ValueLatticeElement Result = - getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); + getImpl(PImpl, AC, M).getValueOnEdge(V, FromBB, ToBB, CxtI); - if (Result.isUndefined()) + if (Result.isUnknown()) return ConstantRange::getEmpty(Width); if (Result.isConstantRange()) return Result.getConstantRange(); @@ -1843,11 +1732,11 @@ LazyValueInfo::Tristate LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI) { - const DataLayout &DL = FromBB->getModule()->getDataLayout(); + Module *M = FromBB->getModule(); ValueLatticeElement Result = - getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); + getImpl(PImpl, AC, M).getValueOnEdge(V, FromBB, ToBB, CxtI); - return getPredicateResult(Pred, C, Result, DL, TLI); + return getPredicateResult(Pred, C, Result, M->getDataLayout(), TLI); } LazyValueInfo::Tristate @@ -1857,7 +1746,8 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C, // isKnownNonZero can tell us the result of the predicate, we can // return it quickly. But this is only a fastpath, and falling // through would still be correct. - const DataLayout &DL = CxtI->getModule()->getDataLayout(); + Module *M = CxtI->getModule(); + const DataLayout &DL = M->getDataLayout(); if (V->getType()->isPointerTy() && C->isNullValue() && isKnownNonZero(V->stripPointerCastsSameRepresentation(), DL)) { if (Pred == ICmpInst::ICMP_EQ) @@ -1865,7 +1755,7 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C, else if (Pred == ICmpInst::ICMP_NE) return LazyValueInfo::True; } - ValueLatticeElement Result = getImpl(PImpl, AC, &DL, DT).getValueAt(V, CxtI); + ValueLatticeElement Result = getImpl(PImpl, AC, M).getValueAt(V, CxtI); Tristate Ret = getPredicateResult(Pred, C, Result, DL, TLI); if (Ret != Unknown) return Ret; @@ -1954,35 +1844,24 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C, void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc) { if (PImpl) { - const DataLayout &DL = PredBB->getModule()->getDataLayout(); - getImpl(PImpl, AC, &DL, DT).threadEdge(PredBB, OldSucc, NewSucc); + getImpl(PImpl, AC, PredBB->getModule()) + .threadEdge(PredBB, OldSucc, NewSucc); } } void LazyValueInfo::eraseBlock(BasicBlock *BB) { if (PImpl) { - const DataLayout &DL = BB->getModule()->getDataLayout(); - getImpl(PImpl, AC, &DL, DT).eraseBlock(BB); + getImpl(PImpl, AC, BB->getModule()).eraseBlock(BB); } } void LazyValueInfo::printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS) { if (PImpl) { - getImpl(PImpl, AC, DL, DT).printLVI(F, DTree, OS); + getImpl(PImpl, AC, F.getParent()).printLVI(F, DTree, OS); } } -void LazyValueInfo::disableDT() { - if (PImpl) - getImpl(PImpl, AC, DL, DT).disableDT(); -} - -void LazyValueInfo::enableDT() { - if (PImpl) - getImpl(PImpl, AC, DL, DT).enableDT(); -} - // Print the LVI for the function arguments at the start of each basic block. void LazyValueInfoAnnotatedWriter::emitBasicBlockStartAnnot( const BasicBlock *BB, formatted_raw_ostream &OS) { @@ -1991,7 +1870,7 @@ void LazyValueInfoAnnotatedWriter::emitBasicBlockStartAnnot( for (auto &Arg : F->args()) { ValueLatticeElement Result = LVIImpl->getValueInBlock( const_cast<Argument *>(&Arg), const_cast<BasicBlock *>(BB)); - if (Result.isUndefined()) + if (Result.isUnknown()) continue; OS << "; LatticeVal for: '" << Arg << "' is: " << Result << "\n"; } diff --git a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp index 0f274429f11fd..10ead10192060 100644 --- a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp @@ -301,14 +301,13 @@ FunctionPass *llvm::createLegacyDivergenceAnalysisPass() { void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<PostDominatorTreeWrapperPass>(); - if (UseGPUDA) - AU.addRequired<LoopInfoWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); AU.setPreservesAll(); } bool LegacyDivergenceAnalysis::shouldUseGPUDivergenceAnalysis( - const Function &F) const { - if (!UseGPUDA) + const Function &F, const TargetTransformInfo &TTI) const { + if (!(UseGPUDA || TTI.useGPUDivergenceAnalysis())) return false; // GPUDivergenceAnalysis requires a reducible CFG. @@ -337,7 +336,7 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) { auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); - if (shouldUseGPUDivergenceAnalysis(F)) { + if (shouldUseGPUDivergenceAnalysis(F, TTI)) { // run the new GPU divergence analysis auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); gpuDA = std::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI); diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp index ba945eb4318fc..564c00dbad983 100644 --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -49,7 +49,6 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -93,15 +92,12 @@ namespace { void visitFunction(Function &F); - void visitCallSite(CallSite CS); - void visitMemoryReference(Instruction &I, Value *Ptr, - uint64_t Size, unsigned Align, - Type *Ty, unsigned Flags); + void visitCallBase(CallBase &CB); + void visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, + MaybeAlign Alignment, Type *Ty, unsigned Flags); void visitEHBeginCatch(IntrinsicInst *II); void visitEHEndCatch(IntrinsicInst *II); - void visitCallInst(CallInst &I); - void visitInvokeInst(InvokeInst &I); void visitReturnInst(ReturnInst &I); void visitLoadInst(LoadInst &I); void visitStoreInst(StoreInst &I); @@ -222,21 +218,20 @@ void Lint::visitFunction(Function &F) { // TODO: Check for irreducible control flow. } -void Lint::visitCallSite(CallSite CS) { - Instruction &I = *CS.getInstruction(); - Value *Callee = CS.getCalledValue(); +void Lint::visitCallBase(CallBase &I) { + Value *Callee = I.getCalledOperand(); - visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr, + visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, None, nullptr, MemRef::Callee); if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) { - Assert(CS.getCallingConv() == F->getCallingConv(), + Assert(I.getCallingConv() == F->getCallingConv(), "Undefined behavior: Caller and callee calling convention differ", &I); FunctionType *FT = F->getFunctionType(); - unsigned NumActualArgs = CS.arg_size(); + unsigned NumActualArgs = I.arg_size(); Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs : FT->getNumParams() == NumActualArgs, @@ -252,7 +247,7 @@ void Lint::visitCallSite(CallSite CS) { // Check argument types (in case the callee was casted) and attributes. // TODO: Verify that caller and callee attributes are compatible. Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); - CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + auto AI = I.arg_begin(), AE = I.arg_end(); for (; AI != AE; ++AI) { Value *Actual = *AI; if (PI != PE) { @@ -266,16 +261,15 @@ void Lint::visitCallSite(CallSite CS) { // not fully precise because we don't know the sizes of the dereferenced // memory regions. if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) { - AttributeList PAL = CS.getAttributes(); + AttributeList PAL = I.getAttributes(); unsigned ArgNo = 0; - for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; - ++BI, ++ArgNo) { + for (auto BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) { // Skip ByVal arguments since they will be memcpy'd to the callee's // stack so we're not really passing the pointer anyway. if (PAL.hasParamAttribute(ArgNo, Attribute::ByVal)) continue; // If both arguments are readonly, they have no dependence. - if (Formal->onlyReadsMemory() && CS.onlyReadsMemory(ArgNo)) + if (Formal->onlyReadsMemory() && I.onlyReadsMemory(ArgNo)) continue; if (AI != BI && (*BI)->getType()->isPointerTy()) { AliasResult Result = AA->alias(*AI, *BI); @@ -290,19 +284,18 @@ void Lint::visitCallSite(CallSite CS) { Type *Ty = cast<PointerType>(Formal->getType())->getElementType(); visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty), - DL->getABITypeAlignment(Ty), Ty, + DL->getABITypeAlign(Ty), Ty, MemRef::Read | MemRef::Write); } } } } - if (CS.isCall()) { - const CallInst *CI = cast<CallInst>(CS.getInstruction()); + if (const auto *CI = dyn_cast<CallInst>(&I)) { if (CI->isTailCall()) { const AttributeList &PAL = CI->getAttributes(); unsigned ArgNo = 0; - for (Value *Arg : CS.args()) { + for (Value *Arg : I.args()) { // Skip ByVal arguments since they will be memcpy'd to the callee's // stack anyway. if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal)) @@ -327,9 +320,9 @@ void Lint::visitCallSite(CallSite CS) { MemCpyInst *MCI = cast<MemCpyInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize, - MCI->getDestAlignment(), nullptr, MemRef::Write); + MCI->getDestAlign(), nullptr, MemRef::Write); visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize, - MCI->getSourceAlignment(), nullptr, MemRef::Read); + MCI->getSourceAlign(), nullptr, MemRef::Read); // Check that the memcpy arguments don't overlap. The AliasAnalysis API // isn't expressive enough for what we really want to do. Known partial @@ -345,20 +338,36 @@ void Lint::visitCallSite(CallSite CS) { "Undefined behavior: memcpy source and destination overlap", &I); break; } + case Intrinsic::memcpy_inline: { + MemCpyInlineInst *MCII = cast<MemCpyInlineInst>(&I); + const uint64_t Size = MCII->getLength()->getValue().getLimitedValue(); + visitMemoryReference(I, MCII->getDest(), Size, MCII->getDestAlign(), + nullptr, MemRef::Write); + visitMemoryReference(I, MCII->getSource(), Size, MCII->getSourceAlign(), + nullptr, MemRef::Read); + + // Check that the memcpy arguments don't overlap. The AliasAnalysis API + // isn't expressive enough for what we really want to do. Known partial + // overlap is not distinguished from the case where nothing is known. + const LocationSize LS = LocationSize::precise(Size); + Assert(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) != MustAlias, + "Undefined behavior: memcpy source and destination overlap", &I); + break; + } case Intrinsic::memmove: { MemMoveInst *MMI = cast<MemMoveInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize, - MMI->getDestAlignment(), nullptr, MemRef::Write); + MMI->getDestAlign(), nullptr, MemRef::Write); visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize, - MMI->getSourceAlignment(), nullptr, MemRef::Read); + MMI->getSourceAlign(), nullptr, MemRef::Read); break; } case Intrinsic::memset: { MemSetInst *MSI = cast<MemSetInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize, - MSI->getDestAlignment(), nullptr, MemRef::Write); + MSI->getDestAlign(), nullptr, MemRef::Write); break; } @@ -367,38 +376,30 @@ void Lint::visitCallSite(CallSite CS) { "Undefined behavior: va_start called in a non-varargs function", &I); - visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, - nullptr, MemRef::Read | MemRef::Write); + visitMemoryReference(I, I.getArgOperand(0), MemoryLocation::UnknownSize, + None, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::vacopy: - visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, - nullptr, MemRef::Write); - visitMemoryReference(I, CS.getArgument(1), MemoryLocation::UnknownSize, 0, - nullptr, MemRef::Read); + visitMemoryReference(I, I.getArgOperand(0), MemoryLocation::UnknownSize, + None, nullptr, MemRef::Write); + visitMemoryReference(I, I.getArgOperand(1), MemoryLocation::UnknownSize, + None, nullptr, MemRef::Read); break; case Intrinsic::vaend: - visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, - nullptr, MemRef::Read | MemRef::Write); + visitMemoryReference(I, I.getArgOperand(0), MemoryLocation::UnknownSize, + None, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::stackrestore: // Stackrestore doesn't read or write memory, but it sets the // stack pointer, which the compiler may read from or write to // at any time, so check it for both readability and writeability. - visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, - nullptr, MemRef::Read | MemRef::Write); + visitMemoryReference(I, I.getArgOperand(0), MemoryLocation::UnknownSize, + None, nullptr, MemRef::Read | MemRef::Write); break; } } -void Lint::visitCallInst(CallInst &I) { - return visitCallSite(&I); -} - -void Lint::visitInvokeInst(InvokeInst &I) { - return visitCallSite(&I); -} - void Lint::visitReturnInst(ReturnInst &I) { Function *F = I.getParent()->getParent(); Assert(!F->doesNotReturn(), @@ -412,9 +413,8 @@ void Lint::visitReturnInst(ReturnInst &I) { // TODO: Check that the reference is in bounds. // TODO: Check readnone/readonly function attributes. -void Lint::visitMemoryReference(Instruction &I, - Value *Ptr, uint64_t Size, unsigned Align, - Type *Ty, unsigned Flags) { +void Lint::visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, + MaybeAlign Align, Type *Ty, unsigned Flags) { // If no memory is being referenced, it doesn't matter if the pointer // is valid. if (Size == 0) @@ -465,15 +465,13 @@ void Lint::visitMemoryReference(Instruction &I, // something we can handle and if so extract the size of this base object // along with its alignment. uint64_t BaseSize = MemoryLocation::UnknownSize; - unsigned BaseAlign = 0; + MaybeAlign BaseAlign; if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { Type *ATy = AI->getAllocatedType(); if (!AI->isArrayAllocation() && ATy->isSized()) BaseSize = DL->getTypeAllocSize(ATy); - BaseAlign = AI->getAlignment(); - if (BaseAlign == 0 && ATy->isSized()) - BaseAlign = DL->getABITypeAlignment(ATy); + BaseAlign = AI->getAlign(); } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { // If the global may be defined differently in another compilation unit // then don't warn about funky memory accesses. @@ -481,9 +479,9 @@ void Lint::visitMemoryReference(Instruction &I, Type *GTy = GV->getValueType(); if (GTy->isSized()) BaseSize = DL->getTypeAllocSize(GTy); - BaseAlign = GV->getAlignment(); - if (BaseAlign == 0 && GTy->isSized()) - BaseAlign = DL->getABITypeAlignment(GTy); + BaseAlign = GV->getAlign(); + if (!BaseAlign && GTy->isSized()) + BaseAlign = DL->getABITypeAlign(GTy); } } @@ -496,24 +494,24 @@ void Lint::visitMemoryReference(Instruction &I, // Accesses that say that the memory is more aligned than it is are not // defined. - if (Align == 0 && Ty && Ty->isSized()) - Align = DL->getABITypeAlignment(Ty); - Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), - "Undefined behavior: Memory reference address is misaligned", &I); + if (!Align && Ty && Ty->isSized()) + Align = DL->getABITypeAlign(Ty); + if (BaseAlign && Align) + Assert(*Align <= commonAlignment(*BaseAlign, Offset), + "Undefined behavior: Memory reference address is misaligned", &I); } } void Lint::visitLoadInst(LoadInst &I) { visitMemoryReference(I, I.getPointerOperand(), - DL->getTypeStoreSize(I.getType()), I.getAlignment(), + DL->getTypeStoreSize(I.getType()), I.getAlign(), I.getType(), MemRef::Read); } void Lint::visitStoreInst(StoreInst &I) { visitMemoryReference(I, I.getPointerOperand(), DL->getTypeStoreSize(I.getOperand(0)->getType()), - I.getAlignment(), - I.getOperand(0)->getType(), MemRef::Write); + I.getAlign(), I.getOperand(0)->getType(), MemRef::Write); } void Lint::visitXor(BinaryOperator &I) { @@ -612,12 +610,12 @@ void Lint::visitAllocaInst(AllocaInst &I) { } void Lint::visitVAArgInst(VAArgInst &I) { - visitMemoryReference(I, I.getOperand(0), MemoryLocation::UnknownSize, 0, + visitMemoryReference(I, I.getOperand(0), MemoryLocation::UnknownSize, None, nullptr, MemRef::Read | MemRef::Write); } void Lint::visitIndirectBrInst(IndirectBrInst &I) { - visitMemoryReference(I, I.getAddress(), MemoryLocation::UnknownSize, 0, + visitMemoryReference(I, I.getAddress(), MemoryLocation::UnknownSize, None, nullptr, MemRef::Branchee); Assert(I.getNumDestinations() != 0, @@ -689,8 +687,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, } } else if (PHINode *PN = dyn_cast<PHINode>(V)) { if (Value *W = PN->hasConstantValue()) - if (W != V) - return findValueImpl(W, OffsetOk, Visited); + return findValueImpl(W, OffsetOk, Visited); } else if (CastInst *CI = dyn_cast<CastInst>(V)) { if (CI->isNoopCast(*DL)) return findValueImpl(CI->getOperand(0), OffsetOk, Visited); @@ -719,9 +716,9 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (Value *W = SimplifyInstruction(Inst, {*DL, TLI, DT, AC})) return findValueImpl(W, OffsetOk, Visited); } else if (auto *C = dyn_cast<Constant>(V)) { - if (Value *W = ConstantFoldConstant(C, *DL, TLI)) - if (W && W != V) - return findValueImpl(W, OffsetOk, Visited); + Value *W = ConstantFoldConstant(C, *DL, TLI); + if (W != V) + return findValueImpl(W, OffsetOk, Visited); } return V; diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index a7d07c0b61834..e5245225d905d 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -27,24 +27,12 @@ using namespace llvm; -static MaybeAlign getBaseAlign(const Value *Base, const DataLayout &DL) { - if (const MaybeAlign PA = Base->getPointerAlignment(DL)) - return *PA; - Type *const Ty = Base->getType()->getPointerElementType(); - if (!Ty->isSized()) - return None; - return Align(DL.getABITypeAlignment(Ty)); -} - static bool isAligned(const Value *Base, const APInt &Offset, Align Alignment, const DataLayout &DL) { - if (MaybeAlign BA = getBaseAlign(Base, DL)) { - const APInt APBaseAlign(Offset.getBitWidth(), BA->value()); - const APInt APAlign(Offset.getBitWidth(), Alignment.value()); - assert(APAlign.isPowerOf2() && "must be a power of 2!"); - return APBaseAlign.uge(APAlign) && !(Offset & (APAlign - 1)); - } - return false; + Align BA = Base->getPointerAlignment(DL); + const APInt APAlign(Offset.getBitWidth(), Alignment.value()); + assert(APAlign.isPowerOf2() && "must be a power of 2!"); + return BA >= Alignment && !(Offset & (APAlign - 1)); } /// Test if V is always a pointer to allocated and suitably aligned memory for @@ -52,7 +40,13 @@ static bool isAligned(const Value *Base, const APInt &Offset, Align Alignment, static bool isDereferenceableAndAlignedPointer( const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT, - SmallPtrSetImpl<const Value *> &Visited) { + SmallPtrSetImpl<const Value *> &Visited, unsigned MaxDepth) { + assert(V->getType()->isPointerTy() && "Base must be pointer"); + + // Recursion limit. + if (MaxDepth-- == 0) + return false; + // Already visited? Bail out, we've likely hit unreachable code. if (!Visited.insert(V).second) return false; @@ -61,9 +55,11 @@ static bool isDereferenceableAndAlignedPointer( // malloc may return null. // bitcast instructions are no-ops as far as dereferenceability is concerned. - if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) - return isDereferenceableAndAlignedPointer(BC->getOperand(0), Alignment, - Size, DL, CtxI, DT, Visited); + if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) { + if (BC->getSrcTy()->isPointerTy()) + return isDereferenceableAndAlignedPointer( + BC->getOperand(0), Alignment, Size, DL, CtxI, DT, Visited, MaxDepth); + } bool CheckForNonNull = false; APInt KnownDerefBytes(Size.getBitWidth(), @@ -72,7 +68,7 @@ static bool isDereferenceableAndAlignedPointer( if (!CheckForNonNull || isKnownNonZero(V, DL, 0, nullptr, CtxI, DT)) { // As we recursed through GEPs to get here, we've incrementally checked // that each step advanced by a multiple of the alignment. If our base is - // properly aligned, then the original offset accessed must also be. + // properly aligned, then the original offset accessed must also be. Type *Ty = V->getType(); assert(Ty->isSized() && "must be sized"); APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0); @@ -99,22 +95,22 @@ static bool isDereferenceableAndAlignedPointer( // addrspacecast, so we can't do arithmetic directly on the APInt values. return isDereferenceableAndAlignedPointer( Base, Alignment, Offset + Size.sextOrTrunc(Offset.getBitWidth()), DL, - CtxI, DT, Visited); + CtxI, DT, Visited, MaxDepth); } // For gc.relocate, look through relocations if (const GCRelocateInst *RelocateInst = dyn_cast<GCRelocateInst>(V)) return isDereferenceableAndAlignedPointer( - RelocateInst->getDerivedPtr(), Alignment, Size, DL, CtxI, DT, Visited); + RelocateInst->getDerivedPtr(), Alignment, Size, DL, CtxI, DT, Visited, MaxDepth); if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V)) return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Alignment, - Size, DL, CtxI, DT, Visited); + Size, DL, CtxI, DT, Visited, MaxDepth); if (const auto *Call = dyn_cast<CallBase>(V)) if (auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) return isDereferenceableAndAlignedPointer(RP, Alignment, Size, DL, CtxI, - DT, Visited); + DT, Visited, MaxDepth); // If we don't know, assume the worst. return false; @@ -128,11 +124,11 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Align Alignment, // Note: At the moment, Size can be zero. This ends up being interpreted as // a query of whether [Base, V] is dereferenceable and V is aligned (since // that's what the implementation happened to do). It's unclear if this is - // the desired semantic, but at least SelectionDAG does exercise this case. - + // the desired semantic, but at least SelectionDAG does exercise this case. + SmallPtrSet<const Value *, 32> Visited; return ::isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT, - Visited); + Visited, 16); } bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, @@ -140,9 +136,11 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { - if (!Ty->isSized()) + // For unsized types or scalable vectors we don't know exactly how many bytes + // are dereferenced, so bail out. + if (!Ty->isSized() || isa<ScalableVectorType>(Ty)) return false; - + // When dereferenceability information is provided by a dereferenceable // attribute, we know exactly how many bytes are dereferenceable. If we can // determine the exact offset to the attributed variable, we can use that @@ -160,7 +158,7 @@ bool llvm::isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { - return isDereferenceableAndAlignedPointer(V, Ty, Align::None(), DL, CtxI, DT); + return isDereferenceableAndAlignedPointer(V, Ty, Align(1), DL, CtxI, DT); } /// Test if A and B will obviously have the same value. @@ -202,8 +200,7 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()), DL.getTypeStoreSize(LI->getType())); - const Align Alignment = DL.getValueOrABITypeAlignment( - MaybeAlign(LI->getAlignment()), LI->getType()); + const Align Alignment = LI->getAlign(); Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI(); @@ -259,14 +256,10 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, /// /// This uses the pointee type to determine how many bytes need to be safe to /// load from the pointer. -bool llvm::isSafeToLoadUnconditionally(Value *V, MaybeAlign MA, APInt &Size, +bool llvm::isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom, const DominatorTree *DT) { - // Zero alignment means that the load has the ABI alignment for the target - const Align Alignment = - DL.getValueOrABITypeAlignment(MA, V->getType()->getPointerElementType()); - // If DT is not specified we can't make context-sensitive query const Instruction* CtxI = DT ? ScanFrom : nullptr; if (isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT)) @@ -301,7 +294,8 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, MaybeAlign MA, APInt &Size, return false; Value *AccessedPtr; - MaybeAlign MaybeAccessedAlign; + Type *AccessedTy; + Align AccessedAlign; if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { // Ignore volatile loads. The execution of a volatile load cannot // be used to prove an address is backed by regular memory; it can, @@ -309,20 +303,18 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, MaybeAlign MA, APInt &Size, if (LI->isVolatile()) continue; AccessedPtr = LI->getPointerOperand(); - MaybeAccessedAlign = MaybeAlign(LI->getAlignment()); + AccessedTy = LI->getType(); + AccessedAlign = LI->getAlign(); } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { // Ignore volatile stores (see comment for loads). if (SI->isVolatile()) continue; AccessedPtr = SI->getPointerOperand(); - MaybeAccessedAlign = MaybeAlign(SI->getAlignment()); + AccessedTy = SI->getValueOperand()->getType(); + AccessedAlign = SI->getAlign(); } else continue; - Type *AccessedTy = AccessedPtr->getType()->getPointerElementType(); - - const Align AccessedAlign = - DL.getValueOrABITypeAlignment(MaybeAccessedAlign, AccessedTy); if (AccessedAlign < Alignment) continue; @@ -338,7 +330,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, MaybeAlign MA, APInt &Size, return false; } -bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, MaybeAlign Alignment, +bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, Align Alignment, const DataLayout &DL, Instruction *ScanFrom, const DominatorTree *DT) { @@ -362,7 +354,7 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AliasAnalysis *AA, bool *IsLoad, + AAResults *AA, bool *IsLoad, unsigned *NumScanedInst) { // Don't CSE load that is volatile or anything stronger than unordered. if (!Load->isUnordered()) @@ -373,11 +365,33 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, ScanFrom, MaxInstsToScan, AA, IsLoad, NumScanedInst); } +// Check if the load and the store have the same base, constant offsets and +// non-overlapping access ranges. +static bool AreNonOverlapSameBaseLoadAndStore( + Value *LoadPtr, Type *LoadTy, Value *StorePtr, Type *StoreTy, + const DataLayout &DL) { + APInt LoadOffset(DL.getTypeSizeInBits(LoadPtr->getType()), 0); + APInt StoreOffset(DL.getTypeSizeInBits(StorePtr->getType()), 0); + Value *LoadBase = LoadPtr->stripAndAccumulateConstantOffsets( + DL, LoadOffset, /* AllowNonInbounds */ false); + Value *StoreBase = StorePtr->stripAndAccumulateConstantOffsets( + DL, StoreOffset, /* AllowNonInbounds */ false); + if (LoadBase != StoreBase) + return false; + auto LoadAccessSize = LocationSize::precise(DL.getTypeStoreSize(LoadTy)); + auto StoreAccessSize = LocationSize::precise(DL.getTypeStoreSize(StoreTy)); + ConstantRange LoadRange(LoadOffset, + LoadOffset + LoadAccessSize.toRaw()); + ConstantRange StoreRange(StoreOffset, + StoreOffset + StoreAccessSize.toRaw()); + return LoadRange.intersectWith(StoreRange).isEmptySet(); +} + Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AliasAnalysis *AA, bool *IsLoadCSE, + AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) { if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; @@ -451,10 +465,21 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, StrippedPtr != StorePtr) continue; - // If we have alias analysis and it says the store won't modify the loaded - // value, ignore the store. - if (AA && !isModSet(AA->getModRefInfo(SI, StrippedPtr, AccessSize))) - continue; + if (!AA) { + // When AA isn't available, but if the load and the store have the same + // base, constant offsets and non-overlapping access ranges, ignore the + // store. This is a simple form of alias analysis that is used by the + // inliner. FIXME: use BasicAA if possible. + if (AreNonOverlapSameBaseLoadAndStore( + Ptr, AccessTy, SI->getPointerOperand(), + SI->getValueOperand()->getType(), DL)) + continue; + } else { + // If we have alias analysis and it says the store won't modify the + // loaded value, ignore the store. + if (!isModSet(AA->getModRefInfo(SI, StrippedPtr, AccessSize))) + continue; + } // Otherwise the store that may or may not alias the pointer, bail out. ++ScanFrom; diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 26fa5112c29a7..ae282a7a10952 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -30,7 +30,6 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -43,7 +42,6 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -174,6 +172,13 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, return OrigSCEV; } +RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup( + unsigned Index, RuntimePointerChecking &RtCheck) + : RtCheck(RtCheck), High(RtCheck.Pointers[Index].End), + Low(RtCheck.Pointers[Index].Start) { + Members.push_back(Index); +} + /// Calculate Start and End points of memory access. /// Let's assume A is the first access and B is a memory access on N-th loop /// iteration. Then B is calculated as: @@ -231,14 +236,14 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc); } -SmallVector<RuntimePointerChecking::PointerCheck, 4> +SmallVector<RuntimePointerCheck, 4> RuntimePointerChecking::generateChecks() const { - SmallVector<PointerCheck, 4> Checks; + SmallVector<RuntimePointerCheck, 4> Checks; for (unsigned I = 0; I < CheckingGroups.size(); ++I) { for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) { - const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I]; - const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J]; + const RuntimeCheckingPtrGroup &CGI = CheckingGroups[I]; + const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J]; if (needsChecking(CGI, CGJ)) Checks.push_back(std::make_pair(&CGI, &CGJ)); @@ -254,8 +259,8 @@ void RuntimePointerChecking::generateChecks( Checks = generateChecks(); } -bool RuntimePointerChecking::needsChecking(const CheckingPtrGroup &M, - const CheckingPtrGroup &N) const { +bool RuntimePointerChecking::needsChecking( + const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const { for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I) for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J) if (needsChecking(M.Members[I], N.Members[J])) @@ -277,7 +282,7 @@ static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J, return I; } -bool RuntimePointerChecking::CheckingPtrGroup::addPointer(unsigned Index) { +bool RuntimeCheckingPtrGroup::addPointer(unsigned Index) { const SCEV *Start = RtCheck.Pointers[Index].Start; const SCEV *End = RtCheck.Pointers[Index].End; @@ -352,7 +357,7 @@ void RuntimePointerChecking::groupChecks( // pointers to the same underlying object. if (!UseDependencies) { for (unsigned I = 0; I < Pointers.size(); ++I) - CheckingGroups.push_back(CheckingPtrGroup(I, *this)); + CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this)); return; } @@ -378,7 +383,7 @@ void RuntimePointerChecking::groupChecks( MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue, Pointers[I].IsWritePtr); - SmallVector<CheckingPtrGroup, 2> Groups; + SmallVector<RuntimeCheckingPtrGroup, 2> Groups; auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access)); // Because DepCands is constructed by visiting accesses in the order in @@ -395,7 +400,7 @@ void RuntimePointerChecking::groupChecks( // Go through all the existing sets and see if we can find one // which can include this pointer. - for (CheckingPtrGroup &Group : Groups) { + for (RuntimeCheckingPtrGroup &Group : Groups) { // Don't perform more than a certain amount of comparisons. // This should limit the cost of grouping the pointers to something // reasonable. If we do end up hitting this threshold, the algorithm @@ -415,7 +420,7 @@ void RuntimePointerChecking::groupChecks( // We couldn't add this pointer to any existing set or the threshold // for the number of comparisons has been reached. Create a new group // to hold the current pointer. - Groups.push_back(CheckingPtrGroup(Pointer, *this)); + Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this)); } // We've computed the grouped checks for this partition. @@ -451,7 +456,7 @@ bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const { } void RuntimePointerChecking::printChecks( - raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks, + raw_ostream &OS, const SmallVectorImpl<RuntimePointerCheck> &Checks, unsigned Depth) const { unsigned N = 0; for (const auto &Check : Checks) { @@ -500,7 +505,7 @@ public: typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList; - AccessAnalysis(const DataLayout &Dl, Loop *TheLoop, AliasAnalysis *AA, + AccessAnalysis(const DataLayout &Dl, Loop *TheLoop, AAResults *AA, LoopInfo *LI, MemoryDepChecker::DepCandidates &DA, PredicatedScalarEvolution &PSE) : DL(Dl), TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), @@ -700,18 +705,19 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, // to place a runtime bound check. bool CanDoRT = true; - bool NeedRTCheck = false; + bool MayNeedRTCheck = false; if (!IsRTCheckAnalysisNeeded) return true; bool IsDepCheckNeeded = isDependencyCheckNeeded(); // We assign a consecutive id to access from different alias sets. // Accesses between different groups doesn't need to be checked. - unsigned ASId = 1; + unsigned ASId = 0; for (auto &AS : AST) { int NumReadPtrChecks = 0; int NumWritePtrChecks = 0; bool CanDoAliasSetRT = true; + ++ASId; // We assign consecutive id to access from different dependence sets. // Accesses within the same set don't need a runtime check. @@ -742,14 +748,30 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, // check them. But there is no need to checks if there is only one // dependence set for this alias set. // - // Note that this function computes CanDoRT and NeedRTCheck independently. - // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer - // for which we couldn't find the bounds but we don't actually need to emit - // any checks so it does not matter. + // Note that this function computes CanDoRT and MayNeedRTCheck + // independently. For example CanDoRT=false, MayNeedRTCheck=false means that + // we have a pointer for which we couldn't find the bounds but we don't + // actually need to emit any checks so it does not matter. bool NeedsAliasSetRTCheck = false; - if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2)) + if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2)) { NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1)); + // For alias sets without at least 2 writes or 1 write and 1 read, there + // is no need to generate RT checks and CanDoAliasSetRT for this alias set + // does not impact whether runtime checks can be generated. + if (!NeedsAliasSetRTCheck) { + assert((AS.size() <= 1 || + all_of(AS, + [this](auto AC) { + MemAccessInfo AccessWrite(AC.getValue(), true); + return DepCands.findValue(AccessWrite) == + DepCands.end(); + })) && + "Can only skip updating CanDoRT below, if all entries in AS " + "are reads or there is at most 1 entry"); + continue; + } + } // We need to perform run-time alias checks, but some pointers had bounds // that couldn't be checked. @@ -768,7 +790,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, } CanDoRT &= CanDoAliasSetRT; - NeedRTCheck |= NeedsAliasSetRTCheck; + MayNeedRTCheck |= NeedsAliasSetRTCheck; ++ASId; } @@ -802,15 +824,18 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, } } - if (NeedRTCheck && CanDoRT) + if (MayNeedRTCheck && CanDoRT) RtCheck.generateChecks(DepCands, IsDepCheckNeeded); LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks() << " pointer comparisons.\n"); - RtCheck.Need = NeedRTCheck; + // If we can do run-time checks, but there are no checks, no runtime checks + // are needed. This can happen when all pointers point to the same underlying + // object for example. + RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck; - bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT; + bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT; if (!CanDoRTIfNeeded) RtCheck.reset(); return CanDoRTIfNeeded; @@ -1787,7 +1812,7 @@ bool LoopAccessInfo::canAnalyzeLoop() { return true; } -void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, +void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI, DominatorTree *DT) { typedef SmallPtrSet<Value*, 16> ValueSet; @@ -1810,6 +1835,10 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); + const bool EnableMemAccessVersioningOfLoop = + EnableMemAccessVersioning && + !TheLoop->getHeader()->getParent()->hasOptSize(); + // For each block. for (BasicBlock *BB : TheLoop->blocks()) { // Scan the BB and collect legal loads and stores. Also detect any @@ -1845,7 +1874,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, // If the function has an explicit vectorized counterpart, we can safely // assume that it can be vectorized. if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() && - TLI->isFunctionVectorizable(Call->getCalledFunction()->getName())) + !VFDatabase::getMappings(*Call).empty()) continue; auto *Ld = dyn_cast<LoadInst>(&I); @@ -1865,7 +1894,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, NumLoads++; Loads.push_back(Ld); DepChecker->addAccess(Ld); - if (EnableMemAccessVersioning) + if (EnableMemAccessVersioningOfLoop) collectStridedAccess(Ld); continue; } @@ -1889,7 +1918,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, NumStores++; Stores.push_back(St); DepChecker->addAccess(St); - if (EnableMemAccessVersioning) + if (EnableMemAccessVersioningOfLoop) collectStridedAccess(St); } } // Next instr. @@ -2116,169 +2145,6 @@ bool LoopAccessInfo::isUniform(Value *V) const { return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); } -// FIXME: this function is currently a duplicate of the one in -// LoopVectorize.cpp. -static Instruction *getFirstInst(Instruction *FirstInst, Value *V, - Instruction *Loc) { - if (FirstInst) - return FirstInst; - if (Instruction *I = dyn_cast<Instruction>(V)) - return I->getParent() == Loc->getParent() ? I : nullptr; - return nullptr; -} - -namespace { - -/// IR Values for the lower and upper bounds of a pointer evolution. We -/// need to use value-handles because SCEV expansion can invalidate previously -/// expanded values. Thus expansion of a pointer can invalidate the bounds for -/// a previous one. -struct PointerBounds { - TrackingVH<Value> Start; - TrackingVH<Value> End; -}; - -} // end anonymous namespace - -/// Expand code for the lower and upper bound of the pointer group \p CG -/// in \p TheLoop. \return the values for the bounds. -static PointerBounds -expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, - Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, - const RuntimePointerChecking &PtrRtChecking) { - Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue; - const SCEV *Sc = SE->getSCEV(Ptr); - - unsigned AS = Ptr->getType()->getPointerAddressSpace(); - LLVMContext &Ctx = Loc->getContext(); - - // Use this type for pointer arithmetic. - Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); - - if (SE->isLoopInvariant(Sc, TheLoop)) { - LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" - << *Ptr << "\n"); - // Ptr could be in the loop body. If so, expand a new one at the correct - // location. - Instruction *Inst = dyn_cast<Instruction>(Ptr); - Value *NewPtr = (Inst && TheLoop->contains(Inst)) - ? Exp.expandCodeFor(Sc, PtrArithTy, Loc) - : Ptr; - // We must return a half-open range, which means incrementing Sc. - const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy)); - Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc); - return {NewPtr, NewPtrPlusOne}; - } else { - Value *Start = nullptr, *End = nullptr; - LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); - Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc); - End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); - LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High - << "\n"); - return {Start, End}; - } -} - -/// Turns a collection of checks into a collection of expanded upper and -/// lower bounds for both pointers in the check. -static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds( - const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks, - Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp, - const RuntimePointerChecking &PtrRtChecking) { - SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds; - - // Here we're relying on the SCEV Expander's cache to only emit code for the - // same bounds once. - transform( - PointerChecks, std::back_inserter(ChecksWithBounds), - [&](const RuntimePointerChecking::PointerCheck &Check) { - PointerBounds - First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking), - Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking); - return std::make_pair(First, Second); - }); - - return ChecksWithBounds; -} - -std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks( - Instruction *Loc, - const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks) - const { - const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout(); - auto *SE = PSE->getSE(); - SCEVExpander Exp(*SE, DL, "induction"); - auto ExpandedChecks = - expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, *PtrRtChecking); - - LLVMContext &Ctx = Loc->getContext(); - Instruction *FirstInst = nullptr; - IRBuilder<> ChkBuilder(Loc); - // Our instructions might fold to a constant. - Value *MemoryRuntimeCheck = nullptr; - - for (const auto &Check : ExpandedChecks) { - const PointerBounds &A = Check.first, &B = Check.second; - // Check if two pointers (A and B) conflict where conflict is computed as: - // start(A) <= end(B) && start(B) <= end(A) - unsigned AS0 = A.Start->getType()->getPointerAddressSpace(); - unsigned AS1 = B.Start->getType()->getPointerAddressSpace(); - - assert((AS0 == B.End->getType()->getPointerAddressSpace()) && - (AS1 == A.End->getType()->getPointerAddressSpace()) && - "Trying to bounds check pointers with different address spaces"); - - Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); - Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); - - Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc"); - Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc"); - Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc"); - Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc"); - - // [A|B].Start points to the first accessed byte under base [A|B]. - // [A|B].End points to the last accessed byte, plus one. - // There is no conflict when the intervals are disjoint: - // NoConflict = (B.Start >= A.End) || (A.Start >= B.End) - // - // bound0 = (B.Start < A.End) - // bound1 = (A.Start < B.End) - // IsConflict = bound0 & bound1 - Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0"); - FirstInst = getFirstInst(FirstInst, Cmp0, Loc); - Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1"); - FirstInst = getFirstInst(FirstInst, Cmp1, Loc); - Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); - FirstInst = getFirstInst(FirstInst, IsConflict, Loc); - if (MemoryRuntimeCheck) { - IsConflict = - ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx"); - FirstInst = getFirstInst(FirstInst, IsConflict, Loc); - } - MemoryRuntimeCheck = IsConflict; - } - - if (!MemoryRuntimeCheck) - return std::make_pair(nullptr, nullptr); - - // We have to do this trickery because the IRBuilder might fold the check to a - // constant expression in which case there is no Instruction anchored in a - // the block. - Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck, - ConstantInt::getTrue(Ctx)); - ChkBuilder.Insert(Check, "memcheck.conflict"); - FirstInst = getFirstInst(FirstInst, Check, Loc); - return std::make_pair(FirstInst, Check); -} - -std::pair<Instruction *, Instruction *> -LoopAccessInfo::addRuntimeChecks(Instruction *Loc) const { - if (!PtrRtChecking->Need) - return std::make_pair(nullptr, nullptr); - - return addRuntimeChecks(Loc, PtrRtChecking->getChecks()); -} - void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { Value *Ptr = nullptr; if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess)) @@ -2343,7 +2209,7 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { } LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, - const TargetLibraryInfo *TLI, AliasAnalysis *AA, + const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI) : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)), PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)), diff --git a/llvm/lib/Analysis/LoopAnalysisManager.cpp b/llvm/lib/Analysis/LoopAnalysisManager.cpp index 02d40fb8d72af..21017c04da99f 100644 --- a/llvm/lib/Analysis/LoopAnalysisManager.cpp +++ b/llvm/lib/Analysis/LoopAnalysisManager.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/PassManagerImpl.h" using namespace llvm; diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp index 25325ec1be025..6ba247a87c226 100644 --- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp +++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/BreadthFirstIterator.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -64,10 +65,10 @@ static Loop *getInnerMostLoop(const LoopVectorTy &Loops) { return LastLoop; } - return (std::is_sorted(Loops.begin(), Loops.end(), - [](const Loop *L1, const Loop *L2) { - return L1->getLoopDepth() < L2->getLoopDepth(); - })) + return (llvm::is_sorted(Loops, + [](const Loop *L1, const Loop *L2) { + return L1->getLoopDepth() < L2->getLoopDepth(); + })) ? LastLoop : nullptr; } @@ -90,7 +91,11 @@ static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L)) return false; - return AR->getStepRecurrence(SE) == &ElemSize; + const SCEV *StepRec = AR->getStepRecurrence(SE); + if (StepRec && SE.isKnownNegative(StepRec)) + StepRec = SE.getNegativeSCEV(StepRec); + + return StepRec == &ElemSize; } /// Compute the trip count for the given loop \p L. Return the SCEV expression @@ -285,10 +290,13 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L, const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS); Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType()); - Stride = SE.getNoopOrSignExtend(Stride, WiderType); + if (SE.isKnownNegative(Stride)) + Stride = SE.getNegativeSCEV(Stride); + Stride = SE.getNoopOrAnyExtend(Stride, WiderType); TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType); const SCEV *Numerator = SE.getMulExpr(Stride, TripCount); RefCost = SE.getUDivExpr(Numerator, CacheLineSize); + LLVM_DEBUG(dbgs().indent(4) << "Access is consecutive: RefCost=(TripCount*Stride)/CLS=" << *RefCost << "\n"); @@ -349,6 +357,19 @@ bool IndexedReference::delinearize(const LoopInfo &LI) { return false; } + // The array may be accessed in reverse, for example: + // for (i = N; i > 0; i--) + // A[i] = 0; + // In this case, reconstruct the access function using the absolute value + // of the step recurrence. + const SCEVAddRecExpr *AccessFnAR = dyn_cast<SCEVAddRecExpr>(AccessFn); + const SCEV *StepRec = AccessFnAR ? AccessFnAR->getStepRecurrence(SE) : nullptr; + + if (StepRec && SE.isKnownNegative(StepRec)) + AccessFn = SE.getAddRecExpr(AccessFnAR->getStart(), + SE.getNegativeSCEV(StepRec), + AccessFnAR->getLoop(), + AccessFnAR->getNoWrapFlags()); const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize); Subscripts.push_back(Div); Sizes.push_back(ElemSize); @@ -396,6 +417,7 @@ bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const { const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS); + Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride; return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize); } @@ -537,6 +559,18 @@ bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const { dbgs().indent(2) << Representative << "\n"; }); + + // FIXME: Both positive and negative access functions will be placed + // into the same reference group, resulting in a bi-directional array + // access such as: + // for (i = N; i > 0; i--) + // A[i] = A[N - i]; + // having the same cost calculation as a single dimention access pattern + // for (i = 0; i < N; i++) + // A[i] = A[i]; + // when in actuality, depending on the array size, the first example + // should have a cost closer to 2x the second due to the two cache + // access per iteration from opposite ends of the array Optional<bool> HasTemporalReuse = R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA); Optional<bool> HasSpacialReuse = diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp index 3dc29b40834c7..b5af210f1b920 100644 --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -420,7 +420,7 @@ bool Loop::isCanonical(ScalarEvolution &SE) const { // Check that 'BB' doesn't have any uses outside of the 'L' static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB, - DominatorTree &DT) { + const DominatorTree &DT) { for (const Instruction &I : BB) { // Tokens can't be used in PHI nodes and live-out tokens prevent loop // optimizations, so for the purposes of considered LCSSA form, we @@ -446,14 +446,15 @@ static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB, return true; } -bool Loop::isLCSSAForm(DominatorTree &DT) const { +bool Loop::isLCSSAForm(const DominatorTree &DT) const { // For each block we check that it doesn't have any uses outside of this loop. return all_of(this->blocks(), [&](const BasicBlock *BB) { return isBlockInLCSSAForm(*this, *BB, DT); }); } -bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const { +bool Loop::isRecursivelyLCSSAForm(const DominatorTree &DT, + const LoopInfo &LI) const { // For each block we check that it doesn't have any uses outside of its // innermost loop. This process will transitively guarantee that the current // loop and all of the nested loops are in LCSSA form. @@ -480,8 +481,8 @@ bool Loop::isSafeToClone() const { return false; for (Instruction &I : *BB) - if (auto CS = CallSite(&I)) - if (CS.cannotDuplicate()) + if (auto *CB = dyn_cast<CallBase>(&I)) + if (CB->cannotDuplicate()) return false; } return true; diff --git a/llvm/lib/Analysis/LoopNestAnalysis.cpp b/llvm/lib/Analysis/LoopNestAnalysis.cpp new file mode 100644 index 0000000000000..61e53de93151a --- /dev/null +++ b/llvm/lib/Analysis/LoopNestAnalysis.cpp @@ -0,0 +1,296 @@ +//===- LoopNestAnalysis.cpp - Loop Nest Analysis --------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// The implementation for the loop nest analysis. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopNestAnalysis.h" +#include "llvm/ADT/BreadthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/ValueTracking.h" + +using namespace llvm; + +#define DEBUG_TYPE "loopnest" +#ifndef NDEBUG +static const char *VerboseDebug = DEBUG_TYPE "-verbose"; +#endif + +/// Determine whether the loops structure violates basic requirements for +/// perfect nesting: +/// - the inner loop should be the outer loop's only child +/// - the outer loop header should 'flow' into the inner loop preheader +/// or jump around the inner loop to the outer loop latch +/// - if the inner loop latch exits the inner loop, it should 'flow' into +/// the outer loop latch. +/// Returns true if the loop structure satisfies the basic requirements and +/// false otherwise. +static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop, + ScalarEvolution &SE); + +//===----------------------------------------------------------------------===// +// LoopNest implementation +// + +LoopNest::LoopNest(Loop &Root, ScalarEvolution &SE) + : MaxPerfectDepth(getMaxPerfectDepth(Root, SE)) { + for (Loop *L : breadth_first(&Root)) + Loops.push_back(L); +} + +std::unique_ptr<LoopNest> LoopNest::getLoopNest(Loop &Root, + ScalarEvolution &SE) { + return std::make_unique<LoopNest>(Root, SE); +} + +bool LoopNest::arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop, + ScalarEvolution &SE) { + assert(!OuterLoop.getSubLoops().empty() && "Outer loop should have subloops"); + assert(InnerLoop.getParentLoop() && "Inner loop should have a parent"); + LLVM_DEBUG(dbgs() << "Checking whether loop '" << OuterLoop.getName() + << "' and '" << InnerLoop.getName() + << "' are perfectly nested.\n"); + + // Determine whether the loops structure satisfies the following requirements: + // - the inner loop should be the outer loop's only child + // - the outer loop header should 'flow' into the inner loop preheader + // or jump around the inner loop to the outer loop latch + // - if the inner loop latch exits the inner loop, it should 'flow' into + // the outer loop latch. + if (!checkLoopsStructure(OuterLoop, InnerLoop, SE)) { + LLVM_DEBUG(dbgs() << "Not perfectly nested: invalid loop structure.\n"); + return false; + } + + // Bail out if we cannot retrieve the outer loop bounds. + auto OuterLoopLB = OuterLoop.getBounds(SE); + if (OuterLoopLB == None) { + LLVM_DEBUG(dbgs() << "Cannot compute loop bounds of OuterLoop: " + << OuterLoop << "\n";); + return false; + } + + // Identify the outer loop latch comparison instruction. + const BasicBlock *Latch = OuterLoop.getLoopLatch(); + assert(Latch && "Expecting a valid loop latch"); + const BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator()); + assert(BI && BI->isConditional() && + "Expecting loop latch terminator to be a branch instruction"); + + const CmpInst *OuterLoopLatchCmp = dyn_cast<CmpInst>(BI->getCondition()); + DEBUG_WITH_TYPE( + VerboseDebug, if (OuterLoopLatchCmp) { + dbgs() << "Outer loop latch compare instruction: " << *OuterLoopLatchCmp + << "\n"; + }); + + // Identify the inner loop guard instruction. + BranchInst *InnerGuard = InnerLoop.getLoopGuardBranch(); + const CmpInst *InnerLoopGuardCmp = + (InnerGuard) ? dyn_cast<CmpInst>(InnerGuard->getCondition()) : nullptr; + + DEBUG_WITH_TYPE( + VerboseDebug, if (InnerLoopGuardCmp) { + dbgs() << "Inner loop guard compare instruction: " << *InnerLoopGuardCmp + << "\n"; + }); + + // Determine whether instructions in a basic block are one of: + // - the inner loop guard comparison + // - the outer loop latch comparison + // - the outer loop induction variable increment + // - a phi node, a cast or a branch + auto containsOnlySafeInstructions = [&](const BasicBlock &BB) { + return llvm::all_of(BB, [&](const Instruction &I) { + bool isAllowed = isSafeToSpeculativelyExecute(&I) || isa<PHINode>(I) || + isa<BranchInst>(I); + if (!isAllowed) { + DEBUG_WITH_TYPE(VerboseDebug, { + dbgs() << "Instruction: " << I << "\nin basic block: " << BB + << " is considered unsafe.\n"; + }); + return false; + } + + // The only binary instruction allowed is the outer loop step instruction, + // the only comparison instructions allowed are the inner loop guard + // compare instruction and the outer loop latch compare instruction. + if ((isa<BinaryOperator>(I) && &I != &OuterLoopLB->getStepInst()) || + (isa<CmpInst>(I) && &I != OuterLoopLatchCmp && + &I != InnerLoopGuardCmp)) { + DEBUG_WITH_TYPE(VerboseDebug, { + dbgs() << "Instruction: " << I << "\nin basic block:" << BB + << "is unsafe.\n"; + }); + return false; + } + return true; + }); + }; + + // Check the code surrounding the inner loop for instructions that are deemed + // unsafe. + const BasicBlock *OuterLoopHeader = OuterLoop.getHeader(); + const BasicBlock *OuterLoopLatch = OuterLoop.getLoopLatch(); + const BasicBlock *InnerLoopPreHeader = InnerLoop.getLoopPreheader(); + + if (!containsOnlySafeInstructions(*OuterLoopHeader) || + !containsOnlySafeInstructions(*OuterLoopLatch) || + (InnerLoopPreHeader != OuterLoopHeader && + !containsOnlySafeInstructions(*InnerLoopPreHeader)) || + !containsOnlySafeInstructions(*InnerLoop.getExitBlock())) { + LLVM_DEBUG(dbgs() << "Not perfectly nested: code surrounding inner loop is " + "unsafe\n";); + return false; + } + + LLVM_DEBUG(dbgs() << "Loop '" << OuterLoop.getName() << "' and '" + << InnerLoop.getName() << "' are perfectly nested.\n"); + + return true; +} + +SmallVector<LoopVectorTy, 4> +LoopNest::getPerfectLoops(ScalarEvolution &SE) const { + SmallVector<LoopVectorTy, 4> LV; + LoopVectorTy PerfectNest; + + for (Loop *L : depth_first(const_cast<Loop *>(Loops.front()))) { + if (PerfectNest.empty()) + PerfectNest.push_back(L); + + auto &SubLoops = L->getSubLoops(); + if (SubLoops.size() == 1 && arePerfectlyNested(*L, *SubLoops.front(), SE)) { + PerfectNest.push_back(SubLoops.front()); + } else { + LV.push_back(PerfectNest); + PerfectNest.clear(); + } + } + + return LV; +} + +unsigned LoopNest::getMaxPerfectDepth(const Loop &Root, ScalarEvolution &SE) { + LLVM_DEBUG(dbgs() << "Get maximum perfect depth of loop nest rooted by loop '" + << Root.getName() << "'\n"); + + const Loop *CurrentLoop = &Root; + const auto *SubLoops = &CurrentLoop->getSubLoops(); + unsigned CurrentDepth = 1; + + while (SubLoops->size() == 1) { + const Loop *InnerLoop = SubLoops->front(); + if (!arePerfectlyNested(*CurrentLoop, *InnerLoop, SE)) { + LLVM_DEBUG({ + dbgs() << "Not a perfect nest: loop '" << CurrentLoop->getName() + << "' is not perfectly nested with loop '" + << InnerLoop->getName() << "'\n"; + }); + break; + } + + CurrentLoop = InnerLoop; + SubLoops = &CurrentLoop->getSubLoops(); + ++CurrentDepth; + } + + return CurrentDepth; +} + +static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop, + ScalarEvolution &SE) { + // The inner loop must be the only outer loop's child. + if ((OuterLoop.getSubLoops().size() != 1) || + (InnerLoop.getParentLoop() != &OuterLoop)) + return false; + + // We expect loops in normal form which have a preheader, header, latch... + if (!OuterLoop.isLoopSimplifyForm() || !InnerLoop.isLoopSimplifyForm()) + return false; + + const BasicBlock *OuterLoopHeader = OuterLoop.getHeader(); + const BasicBlock *OuterLoopLatch = OuterLoop.getLoopLatch(); + const BasicBlock *InnerLoopPreHeader = InnerLoop.getLoopPreheader(); + const BasicBlock *InnerLoopLatch = InnerLoop.getLoopLatch(); + const BasicBlock *InnerLoopExit = InnerLoop.getExitBlock(); + + // We expect rotated loops. The inner loop should have a single exit block. + if (OuterLoop.getExitingBlock() != OuterLoopLatch || + InnerLoop.getExitingBlock() != InnerLoopLatch || !InnerLoopExit) + return false; + + // Ensure the only branch that may exist between the loops is the inner loop + // guard. + if (OuterLoopHeader != InnerLoopPreHeader) { + const BranchInst *BI = + dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); + + if (!BI || BI != InnerLoop.getLoopGuardBranch()) + return false; + + // The successors of the inner loop guard should be the inner loop + // preheader and the outer loop latch. + for (const BasicBlock *Succ : BI->successors()) { + if (Succ == InnerLoopPreHeader) + continue; + if (Succ == OuterLoopLatch) + continue; + + DEBUG_WITH_TYPE(VerboseDebug, { + dbgs() << "Inner loop guard successor " << Succ->getName() + << " doesn't lead to inner loop preheader or " + "outer loop latch.\n"; + }); + return false; + } + } + + // Ensure the inner loop exit block leads to the outer loop latch. + if (InnerLoopExit->getSingleSuccessor() != OuterLoopLatch) { + DEBUG_WITH_TYPE( + VerboseDebug, + dbgs() << "Inner loop exit block " << *InnerLoopExit + << " does not directly lead to the outer loop latch.\n";); + return false; + } + + return true; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const LoopNest &LN) { + OS << "IsPerfect="; + if (LN.getMaxPerfectDepth() == LN.getNestDepth()) + OS << "true"; + else + OS << "false"; + OS << ", Depth=" << LN.getNestDepth(); + OS << ", OutermostLoop: " << LN.getOutermostLoop().getName(); + OS << ", Loops: ( "; + for (const Loop *L : LN.getLoops()) + OS << L->getName() << " "; + OS << ")"; + + return OS; +} + +//===----------------------------------------------------------------------===// +// LoopNestPrinterPass implementation +// + +PreservedAnalyses LoopNestPrinterPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + if (auto LN = LoopNest::getLoopNest(L, AR.SE)) + OS << *LN << "\n"; + + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Analysis/LoopPass.cpp b/llvm/lib/Analysis/LoopPass.cpp index 507f5f4428659..520f06003dd22 100644 --- a/llvm/lib/Analysis/LoopPass.cpp +++ b/llvm/lib/Analysis/LoopPass.cpp @@ -93,38 +93,6 @@ void LPPassManager::addLoop(Loop &L) { } } -/// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for -/// all loop passes. -void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From, - BasicBlock *To, Loop *L) { - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - LoopPass *LP = getContainedPass(Index); - LP->cloneBasicBlockAnalysis(From, To, L); - } -} - -/// deleteSimpleAnalysisValue - Invoke deleteAnalysisValue hook for all passes. -void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) { - if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) { - for (Instruction &I : *BB) { - deleteSimpleAnalysisValue(&I, L); - } - } - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - LoopPass *LP = getContainedPass(Index); - LP->deleteAnalysisValue(V, L); - } -} - -/// Invoke deleteAnalysisLoop hook for all passes. -void LPPassManager::deleteSimpleAnalysisLoop(Loop *L) { - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - LoopPass *LP = getContainedPass(Index); - LP->deleteAnalysisLoop(L); - } -} - - // Recurse through all subloops and all loops into LQ. static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) { LQ.push_back(L); @@ -246,10 +214,7 @@ bool LPPassManager::runOnFunction(Function &F) { : CurrentLoop->getName()); dumpPreservedSet(P); - if (CurrentLoopDeleted) { - // Notify passes that the loop is being deleted. - deleteSimpleAnalysisLoop(CurrentLoop); - } else { + if (!CurrentLoopDeleted) { // Manually check that this loop is still healthy. This is done // instead of relying on LoopInfo::verifyLoop since LoopInfo // is a function pass and it's really expensive to verify every diff --git a/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp b/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp index 762623de41e96..b04cc46bd2721 100644 --- a/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp +++ b/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopUnrollAnalyzer.h" +#include "llvm/Analysis/LoopInfo.h" using namespace llvm; diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp new file mode 100644 index 0000000000000..45873f260f23d --- /dev/null +++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp @@ -0,0 +1,301 @@ +//===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the interface between the inliner and a learned model. +// It delegates model evaluation to either the AOT compiled model (the +// 'release' mode) or a runtime-loaded model (the 'development' case). +// +//===----------------------------------------------------------------------===// +#include <limits> +#include <unordered_map> +#include <unordered_set> + +#include "llvm/ADT/SCCIterator.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/InlineFeaturesAnalysis.h" +#include "llvm/Analysis/MLInlineAdvisor.h" +#include "llvm/Analysis/MLModelRunner.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Path.h" + +using namespace llvm; + +#define DEBUG_TYPE "inline-ml" + +static cl::opt<float> SizeIncreaseThreshold( + "ml-advisor-size-increase-threshold", cl::Hidden, + cl::desc("Maximum factor by which expected native size may increase before " + "blocking any further inlining."), + cl::init(2.0)); + +const std::array<std::string, NumberOfFeatures> llvm::FeatureNameMap{ +#define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME, + INLINE_FEATURE_ITERATOR(POPULATE_NAMES) +#undef POPULATE_NAMES +}; + +const char *const llvm::DecisionName = "inlining_decision"; +const char *const llvm::DefaultDecisionName = "inlining_default"; +const char *const llvm::RewardName = "delta_size"; + +CallBase *getInlinableCS(Instruction &I) { + if (auto *CS = dyn_cast<CallBase>(&I)) + if (Function *Callee = CS->getCalledFunction()) { + if (!Callee->isDeclaration()) { + return CS; + } + } + return nullptr; +} + +MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM, + std::unique_ptr<MLModelRunner> Runner) + : InlineAdvisor( + MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), + M(M), ModelRunner(std::move(Runner)), CG(new CallGraph(M)), + InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) { + assert(ModelRunner); + + // Extract the 'call site height' feature - the position of a call site + // relative to the farthest statically reachable SCC node. We don't mutate + // this value while inlining happens. Empirically, this feature proved + // critical in behavioral cloning - i.e. training a model to mimic the manual + // heuristic's decisions - and, thus, equally important for training for + // improvement. + for (auto I = scc_begin(CG.get()); !I.isAtEnd(); ++I) { + const std::vector<CallGraphNode *> &CGNodes = *I; + unsigned Level = 0; + for (auto *CGNode : CGNodes) { + Function *F = CGNode->getFunction(); + if (!F || F->isDeclaration()) + continue; + for (auto &I : instructions(F)) { + if (auto *CS = getInlinableCS(I)) { + auto *Called = CS->getCalledFunction(); + auto Pos = FunctionLevels.find(Called); + // In bottom up traversal, an inlinable callee is either in the + // same SCC, or to a function in a visited SCC. So not finding its + // level means we haven't visited it yet, meaning it's in this SCC. + if (Pos == FunctionLevels.end()) + continue; + Level = std::max(Level, Pos->second + 1); + } + } + } + for (auto *CGNode : CGNodes) { + Function *F = CGNode->getFunction(); + if (F && !F->isDeclaration()) + FunctionLevels[F] = Level; + } + } +} + +void MLInlineAdvisor::onPassEntry() { + // Function passes executed between InlinerPass runs may have changed the + // module-wide features. + NodeCount = 0; + EdgeCount = 0; + for (auto &F : M) + if (!F.isDeclaration()) { + ++NodeCount; + EdgeCount += getLocalCalls(F); + } +} + +int64_t MLInlineAdvisor::getLocalCalls(Function &F) { + return FAM.getResult<InlineFeaturesAnalysis>(F).DirectCallsToDefinedFunctions; +} + +// Update the internal state of the advisor, and force invalidate feature +// analysis. Currently, we maintain minimal (and very simple) global state - the +// number of functions and the number of static calls. We also keep track of the +// total IR size in this module, to stop misbehaving policies at a certain bloat +// factor (SizeIncreaseThreshold) +void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice, + bool CalleeWasDeleted) { + assert(!ForceStop); + Function *Caller = Advice.getCaller(); + Function *Callee = Advice.getCallee(); + + // The caller features aren't valid anymore. + FAM.invalidate<InlineFeaturesAnalysis>(*Caller); + int64_t IRSizeAfter = + getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize); + CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize); + if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize) + ForceStop = true; + + // We can delta-update module-wide features. We know the inlining only changed + // the caller, and maybe the callee (by deleting the latter). + // Nodes are simple to update. + // For edges, we 'forget' the edges that the caller and callee used to have + // before inlining, and add back what they currently have together. + int64_t NewCallerAndCalleeEdges = + FAM.getResult<InlineFeaturesAnalysis>(*Caller) + .DirectCallsToDefinedFunctions; + + if (CalleeWasDeleted) + --NodeCount; + else + NewCallerAndCalleeEdges += FAM.getResult<InlineFeaturesAnalysis>(*Callee) + .DirectCallsToDefinedFunctions; + EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges); + assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0); +} + +int64_t MLInlineAdvisor::getModuleIRSize() const { + int64_t Ret = 0; + for (auto &F : CG->getModule()) + if (!F.isDeclaration()) + Ret += getIRSize(F); + return Ret; +} + +std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdvice(CallBase &CB) { + auto &Caller = *CB.getCaller(); + auto &Callee = *CB.getCalledFunction(); + + auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { + return FAM.getResult<AssumptionAnalysis>(F); + }; + auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { + return FAM.getResult<TargetLibraryAnalysis>(F); + }; + + auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee); + auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller); + + auto TrivialDecision = + llvm::getAttributeBasedInliningDecision(CB, &Callee, TIR, GetTLI); + + // If this is a "never inline" case, there won't be any changes to internal + // state we need to track, so we can just return the base InlineAdvice, which + // will do nothing interesting. + // Same thing if this is a recursive case. + if ((TrivialDecision.hasValue() && !TrivialDecision->isSuccess()) || + &Caller == &Callee) + return std::make_unique<InlineAdvice>(this, CB, ORE, false); + + bool Mandatory = TrivialDecision.hasValue() && TrivialDecision->isSuccess(); + + // If we need to stop, we won't want to track anymore any state changes, so + // we just return the base InlineAdvice, which acts as a noop. + if (ForceStop) { + ORE.emit([&] { + return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB) + << "Won't attempt inlining because module size grew too much."; + }); + return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory); + } + + int CostEstimate = 0; + if (!Mandatory) { + auto IsCallSiteInlinable = + llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache); + if (!IsCallSiteInlinable) { + // We can't inline this for correctness reasons, so return the base + // InlineAdvice, as we don't care about tracking any state changes (which + // won't happen). + return std::make_unique<InlineAdvice>(this, CB, ORE, false); + } + CostEstimate = *IsCallSiteInlinable; + } + + if (Mandatory) + return getMandatoryAdvice(CB, ORE); + + auto NrCtantParams = 0; + for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { + NrCtantParams += (isa<Constant>(*I)); + } + + auto &CallerBefore = FAM.getResult<InlineFeaturesAnalysis>(Caller); + auto &CalleeBefore = FAM.getResult<InlineFeaturesAnalysis>(Callee); + + ModelRunner->setFeature(FeatureIndex::CalleeBasicBlockCount, + CalleeBefore.BasicBlockCount); + ModelRunner->setFeature(FeatureIndex::CallSiteHeight, + FunctionLevels[&Caller]); + ModelRunner->setFeature(FeatureIndex::NodeCount, NodeCount); + ModelRunner->setFeature(FeatureIndex::NrCtantParams, NrCtantParams); + ModelRunner->setFeature(FeatureIndex::CostEstimate, CostEstimate); + ModelRunner->setFeature(FeatureIndex::EdgeCount, EdgeCount); + ModelRunner->setFeature(FeatureIndex::CallerUsers, CallerBefore.Uses); + ModelRunner->setFeature(FeatureIndex::CallerConditionallyExecutedBlocks, + CallerBefore.BlocksReachedFromConditionalInstruction); + ModelRunner->setFeature(FeatureIndex::CallerBasicBlockCount, + CallerBefore.BasicBlockCount); + ModelRunner->setFeature(FeatureIndex::CalleeConditionallyExecutedBlocks, + CalleeBefore.BlocksReachedFromConditionalInstruction); + ModelRunner->setFeature(FeatureIndex::CalleeUsers, CalleeBefore.Uses); + return getAdviceFromModel(CB, ORE); +} + +std::unique_ptr<MLInlineAdvice> +MLInlineAdvisor::getAdviceFromModel(CallBase &CB, + OptimizationRemarkEmitter &ORE) { + return std::make_unique<MLInlineAdvice>(this, CB, ORE, ModelRunner->run()); +} + +std::unique_ptr<MLInlineAdvice> +MLInlineAdvisor::getMandatoryAdvice(CallBase &CB, + OptimizationRemarkEmitter &ORE) { + return std::make_unique<MLInlineAdvice>(this, CB, ORE, true); +} + +void MLInlineAdvice::reportContextForRemark( + DiagnosticInfoOptimizationBase &OR) { + using namespace ore; + OR << NV("Callee", Callee->getName()); + for (size_t I = 0; I < NumberOfFeatures; ++I) + OR << NV(FeatureNameMap[I], getAdvisor()->getModelRunner().getFeature(I)); + OR << NV("ShouldInline", isInliningRecommended()); +} + +void MLInlineAdvice::recordInliningImpl() { + ORE.emit([&]() { + OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block); + reportContextForRemark(R); + return R; + }); + getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false); +} + +void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() { + ORE.emit([&]() { + OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc, + Block); + reportContextForRemark(R); + return R; + }); + getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true); +} + +void MLInlineAdvice::recordUnsuccessfulInliningImpl( + const InlineResult &Result) { + ORE.emit([&]() { + OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful", + DLoc, Block); + reportContextForRemark(R); + return R; + }); +} +void MLInlineAdvice::recordUnattemptedInliningImpl() { + ORE.emit([&]() { + OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block); + reportContextForRemark(R); + return R; + }); +}
\ No newline at end of file diff --git a/llvm/lib/Analysis/MemDepPrinter.cpp b/llvm/lib/Analysis/MemDepPrinter.cpp index 2c57e63251c66..9524ec96bb61a 100644 --- a/llvm/lib/Analysis/MemDepPrinter.cpp +++ b/llvm/lib/Analysis/MemDepPrinter.cpp @@ -10,6 +10,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/Passes.h" #include "llvm/IR/InstIterator.h" @@ -17,6 +18,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; namespace { diff --git a/llvm/lib/Analysis/MemDerefPrinter.cpp b/llvm/lib/Analysis/MemDerefPrinter.cpp index 5d824067df539..564410b8af083 100644 --- a/llvm/lib/Analysis/MemDerefPrinter.cpp +++ b/llvm/lib/Analysis/MemDerefPrinter.cpp @@ -8,12 +8,13 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/Passes.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp index 427e6fd3ace24..0b61b1c0eabd7 100644 --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -52,11 +52,12 @@ using namespace llvm; enum AllocType : uint8_t { OpNewLike = 1<<0, // allocates; never returns null MallocLike = 1<<1 | OpNewLike, // allocates; may return null - CallocLike = 1<<2, // allocates + bzero - ReallocLike = 1<<3, // reallocates - StrDupLike = 1<<4, - MallocOrCallocLike = MallocLike | CallocLike, - AllocLike = MallocLike | CallocLike | StrDupLike, + AlignedAllocLike = 1<<2, // allocates with alignment; may return null + CallocLike = 1<<3, // allocates + bzero + ReallocLike = 1<<4, // reallocates + StrDupLike = 1<<5, + MallocOrCallocLike = MallocLike | CallocLike | AlignedAllocLike, + AllocLike = MallocOrCallocLike | StrDupLike, AnyAlloc = AllocLike | ReallocLike }; @@ -100,6 +101,7 @@ static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = { {LibFunc_msvc_new_array_int_nothrow, {MallocLike, 2, 0, -1}}, // new[](unsigned int, nothrow) {LibFunc_msvc_new_array_longlong, {OpNewLike, 1, 0, -1}}, // new[](unsigned long long) {LibFunc_msvc_new_array_longlong_nothrow, {MallocLike, 2, 0, -1}}, // new[](unsigned long long, nothrow) + {LibFunc_aligned_alloc, {AlignedAllocLike, 2, 1, -1}}, {LibFunc_calloc, {CallocLike, 2, 0, 1}}, {LibFunc_realloc, {ReallocLike, 2, 1, -1}}, {LibFunc_reallocf, {ReallocLike, 2, 1, -1}}, @@ -117,13 +119,13 @@ static const Function *getCalledFunction(const Value *V, bool LookThroughBitCast if (LookThroughBitCast) V = V->stripPointerCasts(); - ImmutableCallSite CS(V); - if (!CS.getInstruction()) + const auto *CB = dyn_cast<CallBase>(V); + if (!CB) return nullptr; - IsNoBuiltin = CS.isNoBuiltin(); + IsNoBuiltin = CB->isNoBuiltin(); - if (const Function *Callee = CS.getCalledFunction()) + if (const Function *Callee = CB->getCalledFunction()) return Callee; return nullptr; } @@ -225,8 +227,9 @@ static Optional<AllocFnsTy> getAllocationSize(const Value *V, } static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { - ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V); - return CS && CS.hasRetAttr(Attribute::NoAlias); + const auto *CB = + dyn_cast<CallBase>(LookThroughBitCast ? V->stripPointerCasts() : V); + return CB && CB->hasRetAttr(Attribute::NoAlias); } /// Tests if a value is a call or invoke to a library function that @@ -266,6 +269,20 @@ bool llvm::isMallocLikeFn( } /// Tests if a value is a call or invoke to a library function that +/// allocates uninitialized memory with alignment (such as aligned_alloc). +bool llvm::isAlignedAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, AlignedAllocLike, TLI, LookThroughBitCast) + .hasValue(); +} +bool llvm::isAlignedAllocLikeFn( + const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI, + bool LookThroughBitCast) { + return getAllocationData(V, AlignedAllocLike, GetTLI, LookThroughBitCast) + .hasValue(); +} + +/// Tests if a value is a call or invoke to a library function that /// allocates zero-filled memory (such as calloc). bool llvm::isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast) { @@ -439,7 +456,11 @@ bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) { TLIFn == LibFunc_msvc_delete_array_ptr64_nothrow) // delete[](void*, nothrow) ExpectedNumParams = 2; else if (TLIFn == LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t || // delete(void*, align_val_t, nothrow) - TLIFn == LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t) // delete[](void*, align_val_t, nothrow) + TLIFn == LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t || // delete[](void*, align_val_t, nothrow) + TLIFn == LibFunc_ZdlPvjSt11align_val_t || // delete(void*, unsigned long, align_val_t) + TLIFn == LibFunc_ZdlPvmSt11align_val_t || // delete(void*, unsigned long, align_val_t) + TLIFn == LibFunc_ZdaPvjSt11align_val_t || // delete[](void*, unsigned int, align_val_t) + TLIFn == LibFunc_ZdaPvmSt11align_val_t) // delete[](void*, unsigned long, align_val_t) ExpectedNumParams = 3; else return false; @@ -633,6 +654,9 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { if (!I.getAllocatedType()->isSized()) return unknown(); + if (isa<ScalableVectorType>(I.getAllocatedType())) + return unknown(); + APInt Size(IntTyBits, DL.getTypeAllocSize(I.getAllocatedType())); if (!I.isArrayAllocation()) return std::make_pair(align(Size, I.getAlignment()), Zero); @@ -653,7 +677,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { // No interprocedural analysis is done at the moment. - if (!A.hasByValOrInAllocaAttr()) { + if (!A.hasPassPointeeByValueAttr()) { ++ObjectVisitorArgument; return unknown(); } @@ -662,21 +686,21 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { return std::make_pair(align(Size, A.getParamAlignment()), Zero); } -SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { - Optional<AllocFnsTy> FnData = getAllocationSize(CS.getInstruction(), TLI); +SizeOffsetType ObjectSizeOffsetVisitor::visitCallBase(CallBase &CB) { + Optional<AllocFnsTy> FnData = getAllocationSize(&CB, TLI); if (!FnData) return unknown(); // Handle strdup-like functions separately. if (FnData->AllocTy == StrDupLike) { - APInt Size(IntTyBits, GetStringLength(CS.getArgument(0))); + APInt Size(IntTyBits, GetStringLength(CB.getArgOperand(0))); if (!Size) return unknown(); // Strndup limits strlen. if (FnData->FstParam > 0) { ConstantInt *Arg = - dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam)); + dyn_cast<ConstantInt>(CB.getArgOperand(FnData->FstParam)); if (!Arg) return unknown(); @@ -687,7 +711,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { return std::make_pair(Size, Zero); } - ConstantInt *Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam)); + ConstantInt *Arg = dyn_cast<ConstantInt>(CB.getArgOperand(FnData->FstParam)); if (!Arg) return unknown(); @@ -699,7 +723,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { if (FnData->SndParam < 0) return std::make_pair(Size, Zero); - Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->SndParam)); + Arg = dyn_cast<ConstantInt>(CB.getArgOperand(FnData->SndParam)); if (!Arg) return unknown(); @@ -927,8 +951,8 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) { return std::make_pair(Size, Zero); } -SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallSite(CallSite CS) { - Optional<AllocFnsTy> FnData = getAllocationSize(CS.getInstruction(), TLI); +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallBase(CallBase &CB) { + Optional<AllocFnsTy> FnData = getAllocationSize(&CB, TLI); if (!FnData) return unknown(); @@ -938,12 +962,12 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallSite(CallSite CS) { return unknown(); } - Value *FirstArg = CS.getArgument(FnData->FstParam); + Value *FirstArg = CB.getArgOperand(FnData->FstParam); FirstArg = Builder.CreateZExtOrTrunc(FirstArg, IntTy); if (FnData->SndParam < 0) return std::make_pair(FirstArg, Zero); - Value *SecondArg = CS.getArgument(FnData->SndParam); + Value *SecondArg = CB.getArgOperand(FnData->SndParam); SecondArg = Builder.CreateZExtOrTrunc(SecondArg, IntTy); Value *Size = Builder.CreateMul(FirstArg, SecondArg); return std::make_pair(Size, Zero); diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index a97a56e258050..566eba5c54aff 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -23,7 +23,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/PhiValues.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -238,83 +237,6 @@ MemDepResult MemoryDependenceResults::getCallDependencyFrom( return MemDepResult::getNonFuncLocal(); } -unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize( - const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize, - const LoadInst *LI) { - // We can only extend simple integer loads. - if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) - return 0; - - // Load widening is hostile to ThreadSanitizer: it may cause false positives - // or make the reports more cryptic (access sizes are wrong). - if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) - return 0; - - const DataLayout &DL = LI->getModule()->getDataLayout(); - - // Get the base of this load. - int64_t LIOffs = 0; - const Value *LIBase = - GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, DL); - - // If the two pointers are not based on the same pointer, we can't tell that - // they are related. - if (LIBase != MemLocBase) - return 0; - - // Okay, the two values are based on the same pointer, but returned as - // no-alias. This happens when we have things like two byte loads at "P+1" - // and "P+3". Check to see if increasing the size of the "LI" load up to its - // alignment (or the largest native integer type) will allow us to load all - // the bits required by MemLoc. - - // If MemLoc is before LI, then no widening of LI will help us out. - if (MemLocOffs < LIOffs) - return 0; - - // Get the alignment of the load in bytes. We assume that it is safe to load - // any legal integer up to this size without a problem. For example, if we're - // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can - // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it - // to i16. - unsigned LoadAlign = LI->getAlignment(); - - int64_t MemLocEnd = MemLocOffs + MemLocSize; - - // If no amount of rounding up will let MemLoc fit into LI, then bail out. - if (LIOffs + LoadAlign < MemLocEnd) - return 0; - - // This is the size of the load to try. Start with the next larger power of - // two. - unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U; - NewLoadByteSize = NextPowerOf2(NewLoadByteSize); - - while (true) { - // If this load size is bigger than our known alignment or would not fit - // into a native integer register, then we fail. - if (NewLoadByteSize > LoadAlign || - !DL.fitsInLegalInteger(NewLoadByteSize * 8)) - return 0; - - if (LIOffs + NewLoadByteSize > MemLocEnd && - (LI->getParent()->getParent()->hasFnAttribute( - Attribute::SanitizeAddress) || - LI->getParent()->getParent()->hasFnAttribute( - Attribute::SanitizeHWAddress))) - // We will be reading past the location accessed by the original program. - // While this is safe in a regular build, Address Safety analysis tools - // may start reporting false warnings. So, don't do widening. - return 0; - - // If a load of this width would include all of MemLoc, then we succeed. - if (LIOffs + NewLoadByteSize >= MemLocEnd) - return NewLoadByteSize; - - NewLoadByteSize <<= 1; - } -} - static bool isVolatile(Instruction *Inst) { if (auto *LI = dyn_cast<LoadInst>(Inst)) return LI->isVolatile(); @@ -327,8 +249,7 @@ static bool isVolatile(Instruction *Inst) { MemDepResult MemoryDependenceResults::getPointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, - BasicBlock *BB, Instruction *QueryInst, unsigned *Limit, - OrderedBasicBlock *OBB) { + BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) { MemDepResult InvariantGroupDependency = MemDepResult::getUnknown(); if (QueryInst != nullptr) { if (auto *LI = dyn_cast<LoadInst>(QueryInst)) { @@ -339,7 +260,7 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom( } } MemDepResult SimpleDep = getSimplePointerDependencyFrom( - MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, OBB); + MemLoc, isLoad, ScanIt, BB, QueryInst, Limit); if (SimpleDep.isDef()) return SimpleDep; // Non-local invariant group dependency indicates there is non local Def @@ -440,8 +361,7 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, - BasicBlock *BB, Instruction *QueryInst, unsigned *Limit, - OrderedBasicBlock *OBB) { + BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) { bool isInvariantLoad = false; unsigned DefaultLimit = getDefaultBlockScanLimit(); @@ -488,15 +408,6 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( const DataLayout &DL = BB->getModule()->getDataLayout(); - // If the caller did not provide an ordered basic block, - // create one to lazily compute and cache instruction - // positions inside a BB. This is used to provide fast queries for relative - // position between two instructions in a BB and can be used by - // AliasAnalysis::callCapturesBefore. - OrderedBasicBlock OBBTmp(BB); - if (!OBB) - OBB = &OBBTmp; - // Return "true" if and only if the instruction I is either a non-simple // load or a non-simple store. auto isNonSimpleLoadOrStore = [](Instruction *I) -> bool { @@ -686,7 +597,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc); // If necessary, perform additional analysis. if (isModAndRefSet(MR)) - MR = AA.callCapturesBefore(Inst, MemLoc, &DT, OBB); + MR = AA.callCapturesBefore(Inst, MemLoc, &DT); switch (clearMust(MR)) { case ModRefInfo::NoModRef: // If the call has no effect on the queried pointer, just ignore it. @@ -712,8 +623,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( return MemDepResult::getNonFuncLocal(); } -MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst, - OrderedBasicBlock *OBB) { +MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) { Instruction *ScanPos = QueryInst; // Check for a cached result @@ -753,7 +663,7 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst, LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(), - QueryParent, QueryInst, nullptr, OBB); + QueryParent, QueryInst, nullptr); } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) { bool isReadOnly = AA.onlyReadsMemory(QueryCall); LocalCache = getCallDependencyFrom(QueryCall, isReadOnly, @@ -979,6 +889,11 @@ MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock( Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad, BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) { + bool isInvariantLoad = false; + + if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst)) + isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load); + // Do a binary search to see if we already have an entry for this block in // the cache set. If so, find it. NonLocalDepInfo::iterator Entry = std::upper_bound( @@ -990,6 +905,13 @@ MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock( if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB) ExistingResult = &*Entry; + // Use cached result for invariant load only if there is no dependency for non + // invariant load. In this case invariant load can not have any dependency as + // well. + if (ExistingResult && isInvariantLoad && + !ExistingResult->getResult().isNonFuncLocal()) + ExistingResult = nullptr; + // If we have a cached entry, and it is non-dirty, use it as the value for // this dependency. if (ExistingResult && !ExistingResult->getResult().isDirty()) { @@ -1018,6 +940,10 @@ MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock( MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, QueryInst); + // Don't cache results for invariant load. + if (isInvariantLoad) + return Dep; + // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. if (ExistingResult) @@ -1094,7 +1020,8 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( Instruction *QueryInst, const PHITransAddr &Pointer, const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB, SmallVectorImpl<NonLocalDepResult> &Result, - DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock) { + DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock, + bool IsIncomplete) { // Look up the cached info for Pointer. ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); @@ -1106,6 +1033,10 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( InitialNLPI.Size = Loc.Size; InitialNLPI.AATags = Loc.AATags; + bool isInvariantLoad = false; + if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst)) + isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load); + // Get the NLPI for CacheKey, inserting one into the map if it doesn't // already have one. std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = @@ -1114,7 +1045,8 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( // If we already have a cache entry for this CacheKey, we may need to do some // work to reconcile the cache entry and the current query. - if (!Pair.second) { + // Invariant loads don't participate in caching. Thus no need to reconcile. + if (!isInvariantLoad && !Pair.second) { if (CacheInfo->Size != Loc.Size) { bool ThrowOutEverything; if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) { @@ -1138,12 +1070,16 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( if (Instruction *Inst = Entry.getResult().getInst()) RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); CacheInfo->NonLocalDeps.clear(); + // The cache is cleared (in the above line) so we will have lost + // information about blocks we have already visited. We therefore must + // assume that the cache information is incomplete. + IsIncomplete = true; } else { // This query's Size is less than the cached one. Conservatively restart // the query using the greater size. return getNonLocalPointerDepFromBB( QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad, - StartBB, Result, Visited, SkipFirstBlock); + StartBB, Result, Visited, SkipFirstBlock, IsIncomplete); } } @@ -1158,11 +1094,15 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( if (Instruction *Inst = Entry.getResult().getInst()) RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); CacheInfo->NonLocalDeps.clear(); + // The cache is cleared (in the above line) so we will have lost + // information about blocks we have already visited. We therefore must + // assume that the cache information is incomplete. + IsIncomplete = true; } if (Loc.AATags) return getNonLocalPointerDepFromBB( QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result, - Visited, SkipFirstBlock); + Visited, SkipFirstBlock, IsIncomplete); } } @@ -1170,7 +1110,13 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( // If we have valid cached information for exactly the block we are // investigating, just return it with no recomputation. - if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) { + // Don't use cached information for invariant loads since it is valid for + // non-invariant loads only. + // + // Don't use cached information for invariant loads since it is valid for + // non-invariant loads only. + if (!IsIncomplete && !isInvariantLoad && + CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) { // We have a fully cached result for this query then we can just return the // cached results and populate the visited set. However, we have to verify // that we don't already have conflicting results for these blocks. Check @@ -1207,13 +1153,18 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( } // Otherwise, either this is a new block, a block with an invalid cache - // pointer or one that we're about to invalidate by putting more info into it - // than its valid cache info. If empty, the result will be valid cache info, - // otherwise it isn't. - if (Cache->empty()) - CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); - else - CacheInfo->Pair = BBSkipFirstBlockPair(); + // pointer or one that we're about to invalidate by putting more info into + // it than its valid cache info. If empty and not explicitly indicated as + // incomplete, the result will be valid cache info, otherwise it isn't. + // + // Invariant loads don't affect cache in any way thus no need to update + // CacheInfo as well. + if (!isInvariantLoad) { + if (!IsIncomplete && Cache->empty()) + CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); + else + CacheInfo->Pair = BBSkipFirstBlockPair(); + } SmallVector<BasicBlock *, 32> Worklist; Worklist.push_back(StartBB); @@ -1454,22 +1405,27 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( if (SkipFirstBlock) return false; - bool foundBlock = false; - for (NonLocalDepEntry &I : llvm::reverse(*Cache)) { - if (I.getBB() != BB) - continue; + // Results of invariant loads are not cached thus no need to update cached + // information. + if (!isInvariantLoad) { + for (NonLocalDepEntry &I : llvm::reverse(*Cache)) { + if (I.getBB() != BB) + continue; - assert((GotWorklistLimit || I.getResult().isNonLocal() || - !DT.isReachableFromEntry(BB)) && - "Should only be here with transparent block"); - foundBlock = true; - I.setResult(MemDepResult::getUnknown()); - Result.push_back( - NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr())); - break; + assert((GotWorklistLimit || I.getResult().isNonLocal() || + !DT.isReachableFromEntry(BB)) && + "Should only be here with transparent block"); + + I.setResult(MemDepResult::getUnknown()); + + + break; + } } - (void)foundBlock; (void)GotWorklistLimit; - assert((foundBlock || GotWorklistLimit) && "Current block not in cache?"); + (void)GotWorklistLimit; + // Go ahead and report unknown dependence. + Result.push_back( + NonLocalDepResult(BB, MemDepResult::getUnknown(), Pointer.getAddr())); } // Okay, we're done now. If we added new values to the cache, re-sort it. @@ -1562,15 +1518,25 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) { LocalDeps.erase(LocalDepEntry); } - // If we have any cached pointer dependencies on this instruction, remove - // them. If the instruction has non-pointer type, then it can't be a pointer - // base. + // If we have any cached dependencies on this instruction, remove + // them. - // Remove it from both the load info and the store info. The instruction - // can't be in either of these maps if it is non-pointer. + // If the instruction is a pointer, remove it from both the load info and the + // store info. if (RemInst->getType()->isPointerTy()) { RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false)); RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true)); + } else { + // Otherwise, if the instructions is in the map directly, it must be a load. + // Remove it. + auto toRemoveIt = NonLocalDefsCache.find(RemInst); + if (toRemoveIt != NonLocalDefsCache.end()) { + assert(isa<LoadInst>(RemInst) && + "only load instructions should be added directly"); + const Instruction *DepV = toRemoveIt->second.getResult().getInst(); + ReverseNonLocalDefsCache.find(DepV)->second.erase(RemInst); + NonLocalDefsCache.erase(toRemoveIt); + } } // Loop over all of the things that depend on the instruction we're removing. diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp index 103cdea148e5e..4c31d6786ed8e 100644 --- a/llvm/lib/Analysis/MemoryLocation.cpp +++ b/llvm/lib/Analysis/MemoryLocation.cpp @@ -83,6 +83,23 @@ MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) { AATags); } +Optional<MemoryLocation> MemoryLocation::getOrNone(const Instruction *Inst) { + switch (Inst->getOpcode()) { + case Instruction::Load: + return get(cast<LoadInst>(Inst)); + case Instruction::Store: + return get(cast<StoreInst>(Inst)); + case Instruction::VAArg: + return get(cast<VAArgInst>(Inst)); + case Instruction::AtomicCmpXchg: + return get(cast<AtomicCmpXchgInst>(Inst)); + case Instruction::AtomicRMW: + return get(cast<AtomicRMWInst>(Inst)); + default: + return None; + } +} + MemoryLocation MemoryLocation::getForSource(const MemTransferInst *MTI) { return getForSource(cast<AnyMemTransferInst>(MTI)); } diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp index bf8dc94bfbf94..f2f5fd70f4718 100644 --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -167,7 +167,7 @@ public: if (!IsCall) return Loc == Other.Loc; - if (Call->getCalledValue() != Other.Call->getCalledValue()) + if (Call->getCalledOperand() != Other.Call->getCalledOperand()) return false; return Call->arg_size() == Other.Call->arg_size() && @@ -203,7 +203,7 @@ template <> struct DenseMapInfo<MemoryLocOrCall> { hash_code hash = hash_combine(MLOC.IsCall, DenseMapInfo<const Value *>::getHashValue( - MLOC.getCall()->getCalledValue())); + MLOC.getCall()->getCalledOperand())); for (const Value *Arg : MLOC.getCall()->args()) hash = hash_combine(hash, DenseMapInfo<const Value *>::getHashValue(Arg)); @@ -466,7 +466,8 @@ checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt, assert(isa<MemoryPhi>(MA)); Worklist.append( - upward_defs_begin({const_cast<MemoryAccess *>(MA), MAP.second}), + upward_defs_begin({const_cast<MemoryAccess *>(MA), MAP.second}, + MSSA.getDomTree()), upward_defs_end()); } } @@ -595,8 +596,8 @@ template <class AliasAnalysisType> class ClobberWalker { void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches, ListIndex PriorNode) { - auto UpwardDefs = make_range(upward_defs_begin({Phi, Paths[PriorNode].Loc}), - upward_defs_end()); + auto UpwardDefs = make_range( + upward_defs_begin({Phi, Paths[PriorNode].Loc}, DT), upward_defs_end()); for (const MemoryAccessPair &P : UpwardDefs) { PausedSearches.push_back(Paths.size()); Paths.emplace_back(P.second, P.first, PriorNode); @@ -2298,7 +2299,10 @@ bool MemorySSAWrapperPass::runOnFunction(Function &F) { return false; } -void MemorySSAWrapperPass::verifyAnalysis() const { MSSA->verifyMemorySSA(); } +void MemorySSAWrapperPass::verifyAnalysis() const { + if (VerifyMemorySSA) + MSSA->verifyMemorySSA(); +} void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const { MSSA->print(OS); diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp index 473268982f2df..85af091772e7e 100644 --- a/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -10,11 +10,13 @@ // //===----------------------------------------------------------------===// #include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/LoopIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/Analysis/MemorySSA.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalVariable.h" @@ -781,24 +783,24 @@ void MemorySSAUpdater::updateExitBlocksForClonedLoop( void MemorySSAUpdater::applyUpdates(ArrayRef<CFGUpdate> Updates, DominatorTree &DT) { - SmallVector<CFGUpdate, 4> RevDeleteUpdates; + SmallVector<CFGUpdate, 4> DeleteUpdates; SmallVector<CFGUpdate, 4> InsertUpdates; for (auto &Update : Updates) { if (Update.getKind() == DT.Insert) InsertUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()}); else - RevDeleteUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()}); + DeleteUpdates.push_back({DT.Delete, Update.getFrom(), Update.getTo()}); } - if (!RevDeleteUpdates.empty()) { + if (!DeleteUpdates.empty()) { // Update for inserted edges: use newDT and snapshot CFG as if deletes had // not occurred. // FIXME: This creates a new DT, so it's more expensive to do mix // delete/inserts vs just inserts. We can do an incremental update on the DT // to revert deletes, than re-delete the edges. Teaching DT to do this, is // part of a pending cleanup. - DominatorTree NewDT(DT, RevDeleteUpdates); - GraphDiff<BasicBlock *> GD(RevDeleteUpdates); + DominatorTree NewDT(DT, DeleteUpdates); + GraphDiff<BasicBlock *> GD(DeleteUpdates, /*ReverseApplyUpdates=*/true); applyInsertUpdates(InsertUpdates, NewDT, &GD); } else { GraphDiff<BasicBlock *> GD; @@ -806,7 +808,7 @@ void MemorySSAUpdater::applyUpdates(ArrayRef<CFGUpdate> Updates, } // Update for deleted edges - for (auto &Update : RevDeleteUpdates) + for (auto &Update : DeleteUpdates) removeEdge(Update.getFrom(), Update.getTo()); } diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index 8a1206f49c21d..e7d529d0b51e9 100644 --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -25,10 +25,10 @@ #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/StackSafetyAnalysis.h" #include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" @@ -83,7 +83,7 @@ cl::opt<std::string> ModuleSummaryDotFile( // to know when computing summary for global var, because if global variable // references basic block address we can't import it separately from function // containing that basic block. For simplicity we currently don't import such -// global vars at all. When importing function we aren't interested if any +// global vars at all. When importing function we aren't interested if any // instruction in it takes an address of any basic block, because instruction // can only take an address of basic block located in the same function. static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser, @@ -99,7 +99,7 @@ static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser, if (!Visited.insert(U).second) continue; - ImmutableCallSite CS(U); + const auto *CB = dyn_cast<CallBase>(U); for (const auto &OI : U->operands()) { const User *Operand = dyn_cast<User>(OI); @@ -113,7 +113,7 @@ static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser, // We have a reference to a global value. This should be added to // the reference set unless it is a callee. Callees are handled // specially by WriteFunction and are added to a separate list. - if (!(CS && CS.isCallee(&OI))) + if (!(CB && CB->isCallee(&OI))) RefEdges.insert(Index.getOrInsertValueInfo(GV)); continue; } @@ -145,7 +145,7 @@ static void addVCallToSet(DevirtCallSite Call, GlobalValue::GUID Guid, SetVector<FunctionSummary::ConstVCall> &ConstVCalls) { std::vector<uint64_t> Args; // Start from the second argument to skip the "this" pointer. - for (auto &Arg : make_range(Call.CS.arg_begin() + 1, Call.CS.arg_end())) { + for (auto &Arg : make_range(Call.CB.arg_begin() + 1, Call.CB.arg_end())) { auto *CI = dyn_cast<ConstantInt>(Arg); if (!CI || CI->getBitWidth() > 64) { VCalls.insert({Guid, Call.Offset}); @@ -239,12 +239,12 @@ static bool isNonVolatileStore(const Instruction *I) { return false; } -static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, - const Function &F, BlockFrequencyInfo *BFI, - ProfileSummaryInfo *PSI, DominatorTree &DT, - bool HasLocalsInUsedOrAsm, - DenseSet<GlobalValue::GUID> &CantBePromoted, - bool IsThinLTO) { +static void computeFunctionSummary( + ModuleSummaryIndex &Index, const Module &M, const Function &F, + BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, DominatorTree &DT, + bool HasLocalsInUsedOrAsm, DenseSet<GlobalValue::GUID> &CantBePromoted, + bool IsThinLTO, + std::function<const StackSafetyInfo *(const Function &F)> GetSSICallback) { // Summary not currently supported for anonymous functions, they should // have been named. assert(F.hasName()); @@ -304,8 +304,8 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, } } findRefEdges(Index, &I, RefEdges, Visited); - auto CS = ImmutableCallSite(&I); - if (!CS) + const auto *CB = dyn_cast<CallBase>(&I); + if (!CB) continue; const auto *CI = dyn_cast<CallInst>(&I); @@ -317,8 +317,8 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, if (HasLocalsInUsedOrAsm && CI && CI->isInlineAsm()) HasInlineAsmMaybeReferencingInternal = true; - auto *CalledValue = CS.getCalledValue(); - auto *CalledFunction = CS.getCalledFunction(); + auto *CalledValue = CB->getCalledOperand(); + auto *CalledFunction = CB->getCalledFunction(); if (CalledValue && !CalledFunction) { CalledValue = CalledValue->stripPointerCasts(); // Stripping pointer casts can reveal a called function. @@ -341,7 +341,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, } // We should have named any anonymous globals assert(CalledFunction->hasName()); - auto ScaledCount = PSI->getProfileCount(&I, BFI); + auto ScaledCount = PSI->getProfileCount(*CB, BFI); auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI) : CalleeInfo::HotnessType::Unknown; if (ForceSummaryEdgesCold != FunctionSummary::FSHT_None) @@ -391,6 +391,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, .updateHotness(getHotness(Candidate.Count, PSI)); } } + Index.addBlockCount(F.size()); std::vector<ValueInfo> Refs; if (IsThinLTO) { @@ -469,12 +470,15 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // Don't try to import functions with noinline attribute. F.getAttributes().hasFnAttribute(Attribute::NoInline), F.hasFnAttribute(Attribute::AlwaysInline)}; + std::vector<FunctionSummary::ParamAccess> ParamAccesses; + if (auto *SSI = GetSSICallback(F)) + ParamAccesses = SSI->getParamAccesses(); auto FuncSummary = std::make_unique<FunctionSummary>( Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs), CallGraphEdges.takeVector(), TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(), TypeTestAssumeConstVCalls.takeVector(), - TypeCheckedLoadConstVCalls.takeVector()); + TypeCheckedLoadConstVCalls.takeVector(), std::move(ParamAccesses)); if (NonRenamableLocal) CantBePromoted.insert(F.getGUID()); Index.addGlobalValueSummary(F, std::move(FuncSummary)); @@ -599,7 +603,10 @@ static void computeVariableSummary(ModuleSummaryIndex &Index, bool CanBeInternalized = !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() && !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass(); - GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, CanBeInternalized); + bool Constant = V.isConstant(); + GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, + Constant ? false : CanBeInternalized, + Constant, V.getVCallVisibility()); auto GVarSummary = std::make_unique<GlobalVarSummary>(Flags, VarFlags, RefEdges.takeVector()); if (NonRenamableLocal) @@ -640,7 +647,8 @@ static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) { ModuleSummaryIndex llvm::buildModuleSummaryIndex( const Module &M, std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback, - ProfileSummaryInfo *PSI) { + ProfileSummaryInfo *PSI, + std::function<const StackSafetyInfo *(const Function &F)> GetSSICallback) { assert(PSI); bool EnableSplitLTOUnit = false; if (auto *MD = mdconst::extract_or_null<ConstantInt>( @@ -713,12 +721,16 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( ArrayRef<FunctionSummary::VFuncId>{}, ArrayRef<FunctionSummary::VFuncId>{}, ArrayRef<FunctionSummary::ConstVCall>{}, - ArrayRef<FunctionSummary::ConstVCall>{}); + ArrayRef<FunctionSummary::ConstVCall>{}, + ArrayRef<FunctionSummary::ParamAccess>{}); Index.addGlobalValueSummary(*GV, std::move(Summary)); } else { std::unique_ptr<GlobalVarSummary> Summary = std::make_unique<GlobalVarSummary>( - GVFlags, GlobalVarSummary::GVarFlags(false, false), + GVFlags, + GlobalVarSummary::GVarFlags( + false, false, cast<GlobalVariable>(GV)->isConstant(), + GlobalObject::VCallVisibilityPublic), ArrayRef<ValueInfo>{}); Index.addGlobalValueSummary(*GV, std::move(Summary)); } @@ -750,7 +762,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( computeFunctionSummary(Index, M, F, BFI, PSI, DT, !LocalsUsed.empty() || HasLocalInlineAsmSymbol, - CantBePromoted, IsThinLTO); + CantBePromoted, IsThinLTO, GetSSICallback); } // Compute summaries for all variables defined in module, and save in the @@ -832,13 +844,19 @@ ModuleSummaryIndex ModuleSummaryIndexAnalysis::run(Module &M, ModuleAnalysisManager &AM) { ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M); auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + bool NeedSSI = needsParamAccessSummary(M); return buildModuleSummaryIndex( M, [&FAM](const Function &F) { return &FAM.getResult<BlockFrequencyAnalysis>( *const_cast<Function *>(&F)); }, - &PSI); + &PSI, + [&FAM, NeedSSI](const Function &F) -> const StackSafetyInfo * { + return NeedSSI ? &FAM.getResult<StackSafetyAnalysis>( + const_cast<Function &>(F)) + : nullptr; + }); } char ModuleSummaryIndexWrapperPass::ID = 0; @@ -847,6 +865,7 @@ INITIALIZE_PASS_BEGIN(ModuleSummaryIndexWrapperPass, "module-summary-analysis", "Module Summary Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(StackSafetyInfoWrapperPass) INITIALIZE_PASS_END(ModuleSummaryIndexWrapperPass, "module-summary-analysis", "Module Summary Analysis", false, true) @@ -861,6 +880,7 @@ ModuleSummaryIndexWrapperPass::ModuleSummaryIndexWrapperPass() bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) { auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + bool NeedSSI = needsParamAccessSummary(M); Index.emplace(buildModuleSummaryIndex( M, [this](const Function &F) { @@ -868,7 +888,13 @@ bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) { *const_cast<Function *>(&F)) .getBFI()); }, - PSI)); + PSI, + [&](const Function &F) -> const StackSafetyInfo * { + return NeedSSI ? &getAnalysis<StackSafetyInfoWrapperPass>( + const_cast<Function &>(F)) + .getResult() + : nullptr; + })); return false; } @@ -881,4 +907,27 @@ void ModuleSummaryIndexWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<BlockFrequencyInfoWrapperPass>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); + AU.addRequired<StackSafetyInfoWrapperPass>(); +} + +char ImmutableModuleSummaryIndexWrapperPass::ID = 0; + +ImmutableModuleSummaryIndexWrapperPass::ImmutableModuleSummaryIndexWrapperPass( + const ModuleSummaryIndex *Index) + : ImmutablePass(ID), Index(Index) { + initializeImmutableModuleSummaryIndexWrapperPassPass( + *PassRegistry::getPassRegistry()); } + +void ImmutableModuleSummaryIndexWrapperPass::getAnalysisUsage( + AnalysisUsage &AU) const { + AU.setPreservesAll(); +} + +ImmutablePass *llvm::createImmutableModuleSummaryIndexWrapperPass( + const ModuleSummaryIndex *Index) { + return new ImmutableModuleSummaryIndexWrapperPass(Index); +} + +INITIALIZE_PASS(ImmutableModuleSummaryIndexWrapperPass, "module-summary-info", + "Module summary info", false, true) diff --git a/llvm/lib/Analysis/MustExecute.cpp b/llvm/lib/Analysis/MustExecute.cpp index 952c2cbfec4e0..6e3ff67bdddb9 100644 --- a/llvm/lib/Analysis/MustExecute.cpp +++ b/llvm/lib/Analysis/MustExecute.cpp @@ -357,23 +357,29 @@ ModulePass *llvm::createMustBeExecutedContextPrinter() { bool MustBeExecutedContextPrinter::runOnModule(Module &M) { // We provide non-PM analysis here because the old PM doesn't like to query // function passes from a module pass. - SmallVector<PostDominatorTree *, 8> PDTs; - SmallVector<DominatorTree *, 8> DTs; - SmallVector<LoopInfo *, 8> LIs; + SmallVector<std::unique_ptr<PostDominatorTree>, 8> PDTs; + SmallVector<std::unique_ptr<DominatorTree>, 8> DTs; + SmallVector<std::unique_ptr<LoopInfo>, 8> LIs; GetterTy<LoopInfo> LIGetter = [&](const Function &F) { - DominatorTree *DT = new DominatorTree(const_cast<Function &>(F)); - LoopInfo *LI = new LoopInfo(*DT); - DTs.push_back(DT); - LIs.push_back(LI); - return LI; + DTs.push_back(std::make_unique<DominatorTree>(const_cast<Function &>(F))); + LIs.push_back(std::make_unique<LoopInfo>(*DTs.back())); + return LIs.back().get(); + }; + GetterTy<DominatorTree> DTGetter = [&](const Function &F) { + DTs.push_back(std::make_unique<DominatorTree>(const_cast<Function&>(F))); + return DTs.back().get(); }; GetterTy<PostDominatorTree> PDTGetter = [&](const Function &F) { - PostDominatorTree *PDT = new PostDominatorTree(const_cast<Function &>(F)); - PDTs.push_back(PDT); - return PDT; + PDTs.push_back( + std::make_unique<PostDominatorTree>(const_cast<Function &>(F))); + return PDTs.back().get(); }; - MustBeExecutedContextExplorer Explorer(true, LIGetter, PDTGetter); + MustBeExecutedContextExplorer Explorer( + /* ExploreInterBlock */ true, + /* ExploreCFGForward */ true, + /* ExploreCFGBackward */ true, LIGetter, DTGetter, PDTGetter); + for (Function &F : M) { for (Instruction &I : instructions(F)) { dbgs() << "-- Explore context of: " << I << "\n"; @@ -383,9 +389,6 @@ bool MustBeExecutedContextPrinter::runOnModule(Module &M) { } } - DeleteContainerPointers(PDTs); - DeleteContainerPointers(LIs); - DeleteContainerPointers(DTs); return false; } @@ -475,13 +478,13 @@ static bool maybeEndlessLoop(const Loop &L) { return true; } -static bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI) { +bool llvm::mayContainIrreducibleControl(const Function &F, const LoopInfo *LI) { if (!LI) return false; using RPOTraversal = ReversePostOrderTraversal<const Function *>; RPOTraversal FuncRPOT(&F); - return !containsIrreducibleCFG<const BasicBlock *, const RPOTraversal, - const LoopInfo>(FuncRPOT, *LI); + return containsIrreducibleCFG<const BasicBlock *, const RPOTraversal, + const LoopInfo>(FuncRPOT, *LI); } /// Lookup \p Key in \p Map and return the result, potentially after @@ -632,6 +635,72 @@ MustBeExecutedContextExplorer::findForwardJoinPoint(const BasicBlock *InitBB) { LLVM_DEBUG(dbgs() << "\tJoin block: " << JoinBB->getName() << "\n"); return JoinBB; } +const BasicBlock * +MustBeExecutedContextExplorer::findBackwardJoinPoint(const BasicBlock *InitBB) { + const LoopInfo *LI = LIGetter(*InitBB->getParent()); + const DominatorTree *DT = DTGetter(*InitBB->getParent()); + LLVM_DEBUG(dbgs() << "\tFind backward join point for " << InitBB->getName() + << (LI ? " [LI]" : "") << (DT ? " [DT]" : "")); + + // Try to determine a join block through the help of the dominance tree. If no + // tree was provided, we perform simple pattern matching for one block + // conditionals only. + if (DT) + if (const auto *InitNode = DT->getNode(InitBB)) + if (const auto *IDomNode = InitNode->getIDom()) + return IDomNode->getBlock(); + + const Loop *L = LI ? LI->getLoopFor(InitBB) : nullptr; + const BasicBlock *HeaderBB = L ? L->getHeader() : nullptr; + + // Determine the predecessor blocks but ignore backedges. + SmallVector<const BasicBlock *, 8> Worklist; + for (const BasicBlock *PredBB : predecessors(InitBB)) { + bool IsBackedge = + (PredBB == InitBB) || (HeaderBB == InitBB && L->contains(PredBB)); + // Loop backedges are ignored in backwards propagation: control has to come + // from somewhere. + if (!IsBackedge) + Worklist.push_back(PredBB); + } + + // If there are no other predecessor blocks, there is no join point. + if (Worklist.empty()) + return nullptr; + + // If there is one predecessor block, it is the join point. + if (Worklist.size() == 1) + return Worklist[0]; + + const BasicBlock *JoinBB = nullptr; + if (Worklist.size() == 2) { + const BasicBlock *Pred0 = Worklist[0]; + const BasicBlock *Pred1 = Worklist[1]; + const BasicBlock *Pred0UniquePred = Pred0->getUniquePredecessor(); + const BasicBlock *Pred1UniquePred = Pred1->getUniquePredecessor(); + if (Pred0 == Pred1UniquePred) { + // InitBB <- Pred0 = JoinBB + // InitBB <- Pred1 <- Pred0 = JoinBB + JoinBB = Pred0; + } else if (Pred1 == Pred0UniquePred) { + // InitBB <- Pred0 <- Pred1 = JoinBB + // InitBB <- Pred1 = JoinBB + JoinBB = Pred1; + } else if (Pred0UniquePred == Pred1UniquePred) { + // InitBB <- Pred0 <- JoinBB + // InitBB <- Pred1 <- JoinBB + JoinBB = Pred0UniquePred; + } + } + + if (!JoinBB && L) + JoinBB = L->getHeader(); + + // In backwards direction there is no need to show termination of previous + // instructions. If they do not terminate, the code afterward is dead, making + // any information/transformation correct anyway. + return JoinBB; +} const Instruction * MustBeExecutedContextExplorer::getMustBeExecutedNextInstruction( @@ -690,6 +759,47 @@ MustBeExecutedContextExplorer::getMustBeExecutedNextInstruction( return nullptr; } +const Instruction * +MustBeExecutedContextExplorer::getMustBeExecutedPrevInstruction( + MustBeExecutedIterator &It, const Instruction *PP) { + if (!PP) + return PP; + + bool IsFirst = !(PP->getPrevNode()); + LLVM_DEBUG(dbgs() << "Find next instruction for " << *PP + << (IsFirst ? " [IsFirst]" : "") << "\n"); + + // If we explore only inside a given basic block we stop at the first + // instruction. + if (!ExploreInterBlock && IsFirst) { + LLVM_DEBUG(dbgs() << "\tReached block front in intra-block mode, done\n"); + return nullptr; + } + + // The block and function that contains the current position. + const BasicBlock *PPBlock = PP->getParent(); + + // If we are inside a block we know what instruction was executed before, the + // previous one. + if (!IsFirst) { + const Instruction *PrevPP = PP->getPrevNode(); + LLVM_DEBUG( + dbgs() << "\tIntermediate instruction, continue with previous\n"); + // We did not enter a callee so we simply return the previous instruction. + return PrevPP; + } + + // Finally, we have to handle the case where the program point is the first in + // a block but not in the function. We use the findBackwardJoinPoint helper + // function with information about the function and helper analyses, if + // available. + if (const BasicBlock *JoinBB = findBackwardJoinPoint(PPBlock)) + return &JoinBB->back(); + + LLVM_DEBUG(dbgs() << "\tNo join point found\n"); + return nullptr; +} + MustBeExecutedIterator::MustBeExecutedIterator( MustBeExecutedContextExplorer &Explorer, const Instruction *I) : Explorer(Explorer), CurInst(I) { @@ -697,16 +807,31 @@ MustBeExecutedIterator::MustBeExecutedIterator( } void MustBeExecutedIterator::reset(const Instruction *I) { - CurInst = I; Visited.clear(); - Visited.insert(I); + resetInstruction(I); +} + +void MustBeExecutedIterator::resetInstruction(const Instruction *I) { + CurInst = I; + Head = Tail = nullptr; + Visited.insert({I, ExplorationDirection::FORWARD}); + Visited.insert({I, ExplorationDirection::BACKWARD}); + if (Explorer.ExploreCFGForward) + Head = I; + if (Explorer.ExploreCFGBackward) + Tail = I; } const Instruction *MustBeExecutedIterator::advance() { assert(CurInst && "Cannot advance an end iterator!"); - const Instruction *Next = - Explorer.getMustBeExecutedNextInstruction(*this, CurInst); - if (Next && !Visited.insert(Next).second) - Next = nullptr; - return Next; + Head = Explorer.getMustBeExecutedNextInstruction(*this, Head); + if (Head && Visited.insert({Head, ExplorationDirection ::FORWARD}).second) + return Head; + Head = nullptr; + + Tail = Explorer.getMustBeExecutedPrevInstruction(*this, Tail); + if (Tail && Visited.insert({Tail, ExplorationDirection ::BACKWARD}).second) + return Tail; + Tail = nullptr; + return nullptr; } diff --git a/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp index 811033e731477..80e019f5fc921 100644 --- a/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp +++ b/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp @@ -24,12 +24,12 @@ #include "llvm/Analysis/ObjCARCAliasAnalysis.h" #include "llvm/Analysis/ObjCARCAnalysisUtils.h" +#include "llvm/Analysis/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" -#include "llvm/PassAnalysisSupport.h" -#include "llvm/PassSupport.h" +#include "llvm/Pass.h" #define DEBUG_TYPE "objc-arc-aa" diff --git a/llvm/lib/Analysis/ObjCARCInstKind.cpp b/llvm/lib/Analysis/ObjCARCInstKind.cpp index 0e96c6e975c99..fb416a79ac26e 100644 --- a/llvm/lib/Analysis/ObjCARCInstKind.cpp +++ b/llvm/lib/Analysis/ObjCARCInstKind.cpp @@ -153,7 +153,7 @@ ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) { } } -// A whitelist of intrinsics that we know do not use objc pointers or decrement +// A list of intrinsics that we know do not use objc pointers or decrement // ref counts. static bool isInertIntrinsic(unsigned ID) { // TODO: Make this into a covered switch. @@ -192,7 +192,7 @@ static bool isInertIntrinsic(unsigned ID) { } } -// A whitelist of intrinsics that we know do not use objc pointers or decrement +// A list of intrinsics that we know do not use objc pointers or decrement // ref counts. static bool isUseOnlyIntrinsic(unsigned ID) { // We are conservative and even though intrinsics are unlikely to touch @@ -234,11 +234,11 @@ ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) { } // Otherwise, be conservative. - return GetCallSiteClass(CI); + return GetCallSiteClass(*CI); } case Instruction::Invoke: // Otherwise, be conservative. - return GetCallSiteClass(cast<InvokeInst>(I)); + return GetCallSiteClass(cast<InvokeInst>(*I)); case Instruction::BitCast: case Instruction::GetElementPtr: case Instruction::Select: diff --git a/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp b/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp index 44e6637f63377..2cdf7a1772169 100644 --- a/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp +++ b/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp @@ -36,8 +36,7 @@ OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F) LI.analyze(DT); // Then compute BranchProbabilityInfo. - BranchProbabilityInfo BPI; - BPI.calculate(*F, LI); + BranchProbabilityInfo BPI(*F, LI); // Finally compute BFI. OwnedBFI = std::make_unique<BlockFrequencyInfo>(*F, BPI, LI); @@ -47,6 +46,10 @@ OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F) bool OptimizationRemarkEmitter::invalidate( Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv) { + if (OwnedBFI.get()) { + OwnedBFI.reset(); + BFI = nullptr; + } // This analysis has no state and so can be trivially preserved but it needs // a fresh view of BFI if it was constructed with one. if (BFI && Inv.invalidate<BlockFrequencyAnalysis>(F, PA)) diff --git a/llvm/lib/Analysis/OrderedBasicBlock.cpp b/llvm/lib/Analysis/OrderedBasicBlock.cpp deleted file mode 100644 index 48f2a4020c666..0000000000000 --- a/llvm/lib/Analysis/OrderedBasicBlock.cpp +++ /dev/null @@ -1,111 +0,0 @@ -//===- OrderedBasicBlock.cpp --------------------------------- -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the OrderedBasicBlock class. OrderedBasicBlock -// maintains an interface where clients can query if one instruction comes -// before another in a BasicBlock. Since BasicBlock currently lacks a reliable -// way to query relative position between instructions one can use -// OrderedBasicBlock to do such queries. OrderedBasicBlock is lazily built on a -// source BasicBlock and maintains an internal Instruction -> Position map. A -// OrderedBasicBlock instance should be discarded whenever the source -// BasicBlock changes. -// -// It's currently used by the CaptureTracker in order to find relative -// positions of a pair of instructions inside a BasicBlock. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/OrderedBasicBlock.h" -#include "llvm/IR/Instruction.h" -using namespace llvm; - -OrderedBasicBlock::OrderedBasicBlock(const BasicBlock *BasicB) - : NextInstPos(0), BB(BasicB) { - LastInstFound = BB->end(); -} - -/// Given no cached results, find if \p A comes before \p B in \p BB. -/// Cache and number out instruction while walking \p BB. -bool OrderedBasicBlock::comesBefore(const Instruction *A, - const Instruction *B) { - const Instruction *Inst = nullptr; - assert(!(LastInstFound == BB->end() && NextInstPos != 0) && - "Instruction supposed to be in NumberedInsts"); - assert(A->getParent() == BB && "Instruction supposed to be in the block!"); - assert(B->getParent() == BB && "Instruction supposed to be in the block!"); - - // Start the search with the instruction found in the last lookup round. - auto II = BB->begin(); - auto IE = BB->end(); - if (LastInstFound != IE) - II = std::next(LastInstFound); - - // Number all instructions up to the point where we find 'A' or 'B'. - for (; II != IE; ++II) { - Inst = cast<Instruction>(II); - NumberedInsts[Inst] = NextInstPos++; - if (Inst == A || Inst == B) - break; - } - - assert(II != IE && "Instruction not found?"); - assert((Inst == A || Inst == B) && "Should find A or B"); - LastInstFound = II; - return Inst != B; -} - -/// Find out whether \p A dominates \p B, meaning whether \p A -/// comes before \p B in \p BB. This is a simplification that considers -/// cached instruction positions and ignores other basic blocks, being -/// only relevant to compare relative instructions positions inside \p BB. -bool OrderedBasicBlock::dominates(const Instruction *A, const Instruction *B) { - assert(A->getParent() == B->getParent() && - "Instructions must be in the same basic block!"); - assert(A->getParent() == BB && "Instructions must be in the tracked block!"); - - // First we lookup the instructions. If they don't exist, lookup will give us - // back ::end(). If they both exist, we compare the numbers. Otherwise, if NA - // exists and NB doesn't, it means NA must come before NB because we would - // have numbered NB as well if it didn't. The same is true for NB. If it - // exists, but NA does not, NA must come after it. If neither exist, we need - // to number the block and cache the results (by calling comesBefore). - auto NAI = NumberedInsts.find(A); - auto NBI = NumberedInsts.find(B); - if (NAI != NumberedInsts.end() && NBI != NumberedInsts.end()) - return NAI->second < NBI->second; - if (NAI != NumberedInsts.end()) - return true; - if (NBI != NumberedInsts.end()) - return false; - - return comesBefore(A, B); -} - -void OrderedBasicBlock::eraseInstruction(const Instruction *I) { - if (LastInstFound != BB->end() && I == &*LastInstFound) { - if (LastInstFound == BB->begin()) { - LastInstFound = BB->end(); - NextInstPos = 0; - } else - LastInstFound--; - } - - NumberedInsts.erase(I); -} - -void OrderedBasicBlock::replaceInstruction(const Instruction *Old, - const Instruction *New) { - auto OI = NumberedInsts.find(Old); - if (OI == NumberedInsts.end()) - return; - - NumberedInsts.insert({New, OI->second}); - if (LastInstFound != BB->end() && Old == &*LastInstFound) - LastInstFound = New->getIterator(); - NumberedInsts.erase(Old); -} diff --git a/llvm/lib/Analysis/OrderedInstructions.cpp b/llvm/lib/Analysis/OrderedInstructions.cpp deleted file mode 100644 index e947e5e388a83..0000000000000 --- a/llvm/lib/Analysis/OrderedInstructions.cpp +++ /dev/null @@ -1,50 +0,0 @@ -//===-- OrderedInstructions.cpp - Instruction dominance function ---------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines utility to check dominance relation of 2 instructions. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/OrderedInstructions.h" -using namespace llvm; - -bool OrderedInstructions::localDominates(const Instruction *InstA, - const Instruction *InstB) const { - assert(InstA->getParent() == InstB->getParent() && - "Instructions must be in the same basic block"); - - const BasicBlock *IBB = InstA->getParent(); - auto OBB = OBBMap.find(IBB); - if (OBB == OBBMap.end()) - OBB = OBBMap.insert({IBB, std::make_unique<OrderedBasicBlock>(IBB)}).first; - return OBB->second->dominates(InstA, InstB); -} - -/// Given 2 instructions, use OrderedBasicBlock to check for dominance relation -/// if the instructions are in the same basic block, Otherwise, use dominator -/// tree. -bool OrderedInstructions::dominates(const Instruction *InstA, - const Instruction *InstB) const { - // Use ordered basic block to do dominance check in case the 2 instructions - // are in the same basic block. - if (InstA->getParent() == InstB->getParent()) - return localDominates(InstA, InstB); - return DT->dominates(InstA->getParent(), InstB->getParent()); -} - -bool OrderedInstructions::dfsBefore(const Instruction *InstA, - const Instruction *InstB) const { - // Use ordered basic block in case the 2 instructions are in the same basic - // block. - if (InstA->getParent() == InstB->getParent()) - return localDominates(InstA, InstB); - - DomTreeNode *DA = DT->getNode(InstA->getParent()); - DomTreeNode *DB = DT->getNode(InstB->getParent()); - return DA->getDFSNumIn() < DB->getDFSNumIn(); -} diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp index 911d39d9a2637..c9671d4f5c2e1 100644 --- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -14,11 +14,12 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/InitializePasses.h" +#include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -66,48 +67,52 @@ static cl::opt<int> ProfileSummaryColdCount( cl::desc("A fixed cold count that overrides the count derived from" " profile-summary-cutoff-cold")); -// Find the summary entry for a desired percentile of counts. -static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS, - uint64_t Percentile) { - auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) { - return Entry.Cutoff < Percentile; - }); - // The required percentile has to be <= one of the percentiles in the - // detailed summary. - if (It == DS.end()) - report_fatal_error("Desired percentile exceeds the maximum cutoff"); - return *It; -} +static cl::opt<bool> PartialProfile( + "partial-profile", cl::Hidden, cl::init(false), + cl::desc("Specify the current profile is used as a partial profile.")); + +cl::opt<bool> ScalePartialSampleProfileWorkingSetSize( + "scale-partial-sample-profile-working-set-size", cl::Hidden, cl::init(true), + cl::desc( + "If true, scale the working set size of the partial sample profile " + "by the partial profile ratio to reflect the size of the program " + "being compiled.")); + +static cl::opt<double> PartialSampleProfileWorkingSetSizeScaleFactor( + "partial-sample-profile-working-set-size-scale-factor", cl::Hidden, + cl::init(0.008), + cl::desc("The scale factor used to scale the working set size of the " + "partial sample profile along with the partial profile ratio. " + "This includes the factor of the profile counter per block " + "and the factor to scale the working set size to use the same " + "shared thresholds as PGO.")); // The profile summary metadata may be attached either by the frontend or by // any backend passes (IR level instrumentation, for example). This method // checks if the Summary is null and if so checks if the summary metadata is now -// available in the module and parses it to get the Summary object. Returns true -// if a valid Summary is available. -bool ProfileSummaryInfo::computeSummary() { - if (Summary) - return true; +// available in the module and parses it to get the Summary object. +void ProfileSummaryInfo::refresh() { + if (hasProfileSummary()) + return; // First try to get context sensitive ProfileSummary. auto *SummaryMD = M.getProfileSummary(/* IsCS */ true); - if (SummaryMD) { + if (SummaryMD) Summary.reset(ProfileSummary::getFromMD(SummaryMD)); - return true; + + if (!hasProfileSummary()) { + // This will actually return PSK_Instr or PSK_Sample summary. + SummaryMD = M.getProfileSummary(/* IsCS */ false); + if (SummaryMD) + Summary.reset(ProfileSummary::getFromMD(SummaryMD)); } - // This will actually return PSK_Instr or PSK_Sample summary. - SummaryMD = M.getProfileSummary(/* IsCS */ false); - if (!SummaryMD) - return false; - Summary.reset(ProfileSummary::getFromMD(SummaryMD)); - return true; + if (!hasProfileSummary()) + return; + computeThresholds(); } -Optional<uint64_t> -ProfileSummaryInfo::getProfileCount(const Instruction *Inst, - BlockFrequencyInfo *BFI, - bool AllowSynthetic) { - if (!Inst) - return None; - assert((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) && +Optional<uint64_t> ProfileSummaryInfo::getProfileCount( + const CallBase &Call, BlockFrequencyInfo *BFI, bool AllowSynthetic) const { + assert((isa<CallInst>(Call) || isa<InvokeInst>(Call)) && "We can only get profile count for call/invoke instruction."); if (hasSampleProfile()) { // In sample PGO mode, check if there is a profile metadata on the @@ -115,20 +120,20 @@ ProfileSummaryInfo::getProfileCount(const Instruction *Inst, // since the sampled entry count may not be accurate. If there is no // annotated on the instruction, return None. uint64_t TotalCount; - if (Inst->extractProfTotalWeight(TotalCount)) + if (Call.extractProfTotalWeight(TotalCount)) return TotalCount; return None; } if (BFI) - return BFI->getBlockProfileCount(Inst->getParent(), AllowSynthetic); + return BFI->getBlockProfileCount(Call.getParent(), AllowSynthetic); return None; } /// Returns true if the function's entry is hot. If it returns false, it /// either means it is not hot or it is unknown whether it is hot or not (for /// example, no profile data is available). -bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) { - if (!F || !computeSummary()) +bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) const { + if (!F || !hasProfileSummary()) return false; auto FunctionCount = F->getEntryCount(); // FIXME: The heuristic used below for determining hotness is based on @@ -142,9 +147,9 @@ bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) { /// hot total call edge count. /// If it returns false, it either means it is not hot or it is unknown /// (for example, no profile data is available). -bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F, - BlockFrequencyInfo &BFI) { - if (!F || !computeSummary()) +bool ProfileSummaryInfo::isFunctionHotInCallGraph( + const Function *F, BlockFrequencyInfo &BFI) const { + if (!F || !hasProfileSummary()) return false; if (auto FunctionCount = F->getEntryCount()) if (isHotCount(FunctionCount.getCount())) @@ -155,7 +160,7 @@ bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F, for (const auto &BB : *F) for (const auto &I : BB) if (isa<CallInst>(I) || isa<InvokeInst>(I)) - if (auto CallCount = getProfileCount(&I, nullptr)) + if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr)) TotalCallCount += CallCount.getValue(); if (isHotCount(TotalCallCount)) return true; @@ -171,9 +176,9 @@ bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F, /// the total call edge count is cold. /// If it returns false, it either means it is not cold or it is unknown /// (for example, no profile data is available). -bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F, - BlockFrequencyInfo &BFI) { - if (!F || !computeSummary()) +bool ProfileSummaryInfo::isFunctionColdInCallGraph( + const Function *F, BlockFrequencyInfo &BFI) const { + if (!F || !hasProfileSummary()) return false; if (auto FunctionCount = F->getEntryCount()) if (!isColdCount(FunctionCount.getCount())) @@ -184,7 +189,7 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F, for (const auto &BB : *F) for (const auto &I : BB) if (isa<CallInst>(I) || isa<InvokeInst>(I)) - if (auto CallCount = getProfileCount(&I, nullptr)) + if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr)) TotalCallCount += CallCount.getValue(); if (!isColdCount(TotalCallCount)) return false; @@ -195,40 +200,67 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F, return true; } -// Like isFunctionHotInCallGraph but for a given cutoff. -bool ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile( - int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) { - if (!F || !computeSummary()) +bool ProfileSummaryInfo::isFunctionHotnessUnknown(const Function &F) const { + assert(hasPartialSampleProfile() && "Expect partial sample profile"); + return !F.getEntryCount().hasValue(); +} + +template <bool isHot> +bool ProfileSummaryInfo::isFunctionHotOrColdInCallGraphNthPercentile( + int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const { + if (!F || !hasProfileSummary()) return false; - if (auto FunctionCount = F->getEntryCount()) - if (isHotCountNthPercentile(PercentileCutoff, FunctionCount.getCount())) + if (auto FunctionCount = F->getEntryCount()) { + if (isHot && + isHotCountNthPercentile(PercentileCutoff, FunctionCount.getCount())) return true; - + if (!isHot && + !isColdCountNthPercentile(PercentileCutoff, FunctionCount.getCount())) + return false; + } if (hasSampleProfile()) { uint64_t TotalCallCount = 0; for (const auto &BB : *F) for (const auto &I : BB) if (isa<CallInst>(I) || isa<InvokeInst>(I)) - if (auto CallCount = getProfileCount(&I, nullptr)) + if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr)) TotalCallCount += CallCount.getValue(); - if (isHotCountNthPercentile(PercentileCutoff, TotalCallCount)) + if (isHot && isHotCountNthPercentile(PercentileCutoff, TotalCallCount)) return true; + if (!isHot && !isColdCountNthPercentile(PercentileCutoff, TotalCallCount)) + return false; } - for (const auto &BB : *F) - if (isHotBlockNthPercentile(PercentileCutoff, &BB, &BFI)) + for (const auto &BB : *F) { + if (isHot && isHotBlockNthPercentile(PercentileCutoff, &BB, &BFI)) return true; - return false; + if (!isHot && !isColdBlockNthPercentile(PercentileCutoff, &BB, &BFI)) + return false; + } + return !isHot; +} + +// Like isFunctionHotInCallGraph but for a given cutoff. +bool ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile( + int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const { + return isFunctionHotOrColdInCallGraphNthPercentile<true>( + PercentileCutoff, F, BFI); +} + +bool ProfileSummaryInfo::isFunctionColdInCallGraphNthPercentile( + int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const { + return isFunctionHotOrColdInCallGraphNthPercentile<false>( + PercentileCutoff, F, BFI); } /// Returns true if the function's entry is a cold. If it returns false, it /// either means it is not cold or it is unknown whether it is cold or not (for /// example, no profile data is available). -bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) { +bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) const { if (!F) return false; if (F->hasFnAttribute(Attribute::Cold)) return true; - if (!computeSummary()) + if (!hasProfileSummary()) return false; auto FunctionCount = F->getEntryCount(); // FIXME: The heuristic used below for determining coldness is based on @@ -239,116 +271,151 @@ bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) { /// Compute the hot and cold thresholds. void ProfileSummaryInfo::computeThresholds() { - if (!computeSummary()) - return; auto &DetailedSummary = Summary->getDetailedSummary(); - auto &HotEntry = - getEntryForPercentile(DetailedSummary, ProfileSummaryCutoffHot); + auto &HotEntry = ProfileSummaryBuilder::getEntryForPercentile( + DetailedSummary, ProfileSummaryCutoffHot); HotCountThreshold = HotEntry.MinCount; if (ProfileSummaryHotCount.getNumOccurrences() > 0) HotCountThreshold = ProfileSummaryHotCount; - auto &ColdEntry = - getEntryForPercentile(DetailedSummary, ProfileSummaryCutoffCold); + auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile( + DetailedSummary, ProfileSummaryCutoffCold); ColdCountThreshold = ColdEntry.MinCount; if (ProfileSummaryColdCount.getNumOccurrences() > 0) ColdCountThreshold = ProfileSummaryColdCount; assert(ColdCountThreshold <= HotCountThreshold && "Cold count threshold cannot exceed hot count threshold!"); - HasHugeWorkingSetSize = - HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; - HasLargeWorkingSetSize = - HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold; + if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) { + HasHugeWorkingSetSize = + HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; + HasLargeWorkingSetSize = + HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold; + } else { + // Scale the working set size of the partial sample profile to reflect the + // size of the program being compiled. + double PartialProfileRatio = Summary->getPartialProfileRatio(); + uint64_t ScaledHotEntryNumCounts = + static_cast<uint64_t>(HotEntry.NumCounts * PartialProfileRatio * + PartialSampleProfileWorkingSetSizeScaleFactor); + HasHugeWorkingSetSize = + ScaledHotEntryNumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; + HasLargeWorkingSetSize = + ScaledHotEntryNumCounts > ProfileSummaryLargeWorkingSetSizeThreshold; + } } -Optional<uint64_t> ProfileSummaryInfo::computeThreshold(int PercentileCutoff) { - if (!computeSummary()) +Optional<uint64_t> +ProfileSummaryInfo::computeThreshold(int PercentileCutoff) const { + if (!hasProfileSummary()) return None; auto iter = ThresholdCache.find(PercentileCutoff); if (iter != ThresholdCache.end()) { return iter->second; } auto &DetailedSummary = Summary->getDetailedSummary(); - auto &Entry = - getEntryForPercentile(DetailedSummary, PercentileCutoff); + auto &Entry = ProfileSummaryBuilder::getEntryForPercentile(DetailedSummary, + PercentileCutoff); uint64_t CountThreshold = Entry.MinCount; ThresholdCache[PercentileCutoff] = CountThreshold; return CountThreshold; } -bool ProfileSummaryInfo::hasHugeWorkingSetSize() { - if (!HasHugeWorkingSetSize) - computeThresholds(); +bool ProfileSummaryInfo::hasHugeWorkingSetSize() const { return HasHugeWorkingSetSize && HasHugeWorkingSetSize.getValue(); } -bool ProfileSummaryInfo::hasLargeWorkingSetSize() { - if (!HasLargeWorkingSetSize) - computeThresholds(); +bool ProfileSummaryInfo::hasLargeWorkingSetSize() const { return HasLargeWorkingSetSize && HasLargeWorkingSetSize.getValue(); } -bool ProfileSummaryInfo::isHotCount(uint64_t C) { - if (!HotCountThreshold) - computeThresholds(); +bool ProfileSummaryInfo::isHotCount(uint64_t C) const { return HotCountThreshold && C >= HotCountThreshold.getValue(); } -bool ProfileSummaryInfo::isColdCount(uint64_t C) { - if (!ColdCountThreshold) - computeThresholds(); +bool ProfileSummaryInfo::isColdCount(uint64_t C) const { return ColdCountThreshold && C <= ColdCountThreshold.getValue(); } -bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff, uint64_t C) { +template <bool isHot> +bool ProfileSummaryInfo::isHotOrColdCountNthPercentile(int PercentileCutoff, + uint64_t C) const { auto CountThreshold = computeThreshold(PercentileCutoff); - return CountThreshold && C >= CountThreshold.getValue(); + if (isHot) + return CountThreshold && C >= CountThreshold.getValue(); + else + return CountThreshold && C <= CountThreshold.getValue(); } -uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() { - if (!HotCountThreshold) - computeThresholds(); +bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff, + uint64_t C) const { + return isHotOrColdCountNthPercentile<true>(PercentileCutoff, C); +} + +bool ProfileSummaryInfo::isColdCountNthPercentile(int PercentileCutoff, + uint64_t C) const { + return isHotOrColdCountNthPercentile<false>(PercentileCutoff, C); +} + +uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() const { return HotCountThreshold ? HotCountThreshold.getValue() : UINT64_MAX; } -uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() { - if (!ColdCountThreshold) - computeThresholds(); +uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() const { return ColdCountThreshold ? ColdCountThreshold.getValue() : 0; } -bool ProfileSummaryInfo::isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) { +bool ProfileSummaryInfo::isHotBlock(const BasicBlock *BB, + BlockFrequencyInfo *BFI) const { auto Count = BFI->getBlockProfileCount(BB); return Count && isHotCount(*Count); } bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB, - BlockFrequencyInfo *BFI) { + BlockFrequencyInfo *BFI) const { auto Count = BFI->getBlockProfileCount(BB); return Count && isColdCount(*Count); } -bool ProfileSummaryInfo::isHotBlockNthPercentile(int PercentileCutoff, - const BasicBlock *BB, - BlockFrequencyInfo *BFI) { +template <bool isHot> +bool ProfileSummaryInfo::isHotOrColdBlockNthPercentile( + int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const { auto Count = BFI->getBlockProfileCount(BB); - return Count && isHotCountNthPercentile(PercentileCutoff, *Count); + if (isHot) + return Count && isHotCountNthPercentile(PercentileCutoff, *Count); + else + return Count && isColdCountNthPercentile(PercentileCutoff, *Count); +} + +bool ProfileSummaryInfo::isHotBlockNthPercentile( + int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const { + return isHotOrColdBlockNthPercentile<true>(PercentileCutoff, BB, BFI); +} + +bool ProfileSummaryInfo::isColdBlockNthPercentile( + int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const { + return isHotOrColdBlockNthPercentile<false>(PercentileCutoff, BB, BFI); } -bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS, - BlockFrequencyInfo *BFI) { - auto C = getProfileCount(CS.getInstruction(), BFI); +bool ProfileSummaryInfo::isHotCallSite(const CallBase &CB, + BlockFrequencyInfo *BFI) const { + auto C = getProfileCount(CB, BFI); return C && isHotCount(*C); } -bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS, - BlockFrequencyInfo *BFI) { - auto C = getProfileCount(CS.getInstruction(), BFI); +bool ProfileSummaryInfo::isColdCallSite(const CallBase &CB, + BlockFrequencyInfo *BFI) const { + auto C = getProfileCount(CB, BFI); if (C) return isColdCount(*C); // In SamplePGO, if the caller has been sampled, and there is no profile // annotated on the callsite, we consider the callsite as cold. - return hasSampleProfile() && CS.getCaller()->hasProfileData(); + return hasSampleProfile() && CB.getCaller()->hasProfileData(); +} + +bool ProfileSummaryInfo::hasPartialSampleProfile() const { + return hasProfileSummary() && + Summary->getKind() == ProfileSummary::PSK_Sample && + (PartialProfile || Summary->isPartialProfile()); } INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info", diff --git a/llvm/lib/Analysis/RegionPrinter.cpp b/llvm/lib/Analysis/RegionPrinter.cpp index 020ff85d1b989..1fb5faaa6a71f 100644 --- a/llvm/lib/Analysis/RegionPrinter.cpp +++ b/llvm/lib/Analysis/RegionPrinter.cpp @@ -47,11 +47,11 @@ struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits { BasicBlock *BB = Node->getNodeAs<BasicBlock>(); if (isSimple()) - return DOTGraphTraits<const Function*> - ::getSimpleNodeLabel(BB, BB->getParent()); + return DOTGraphTraits<DOTFuncInfo *> + ::getSimpleNodeLabel(BB, nullptr); else - return DOTGraphTraits<const Function*> - ::getCompleteNodeLabel(BB, BB->getParent()); + return DOTGraphTraits<DOTFuncInfo *> + ::getCompleteNodeLabel(BB, nullptr); } return "Not implemented"; diff --git a/llvm/lib/Analysis/ReleaseModeModelRunner.cpp b/llvm/lib/Analysis/ReleaseModeModelRunner.cpp new file mode 100644 index 0000000000000..4c0ffbc17ff7a --- /dev/null +++ b/llvm/lib/Analysis/ReleaseModeModelRunner.cpp @@ -0,0 +1,87 @@ +//===- ReleaseModeModelRunner.cpp - Fast, precompiled model runner -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a model runner wrapping an AOT compiled ML model. +// Only inference is supported. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/InlineModelFeatureMaps.h" +#include "llvm/Analysis/MLInlineAdvisor.h" + +// codegen-ed file +#include "InlinerSizeModel.h" // NOLINT + +#include <memory> +#include <vector> + +using namespace llvm; +namespace { + +static const char *const FeedPrefix = "feed_"; +static const char *const FetchPrefix = "fetch_"; + +/// MLModelRunner - production mode implementation. It uses a AOT-compiled +/// SavedModel for efficient execution. +class ReleaseModeModelRunner final : public MLModelRunner { +public: + ReleaseModeModelRunner(LLVMContext &Ctx); + virtual ~ReleaseModeModelRunner() = default; + + bool run() override; + + void setFeature(FeatureIndex Index, int64_t Value) override; + int64_t getFeature(int Index) const override; + +private: + std::vector<int32_t> FeatureIndices; + int32_t ResultIndex = -1; + std::unique_ptr<llvm::InlinerSizeModel> CompiledModel; +}; +} // namespace + +ReleaseModeModelRunner::ReleaseModeModelRunner(LLVMContext &Ctx) + : MLModelRunner(Ctx), + CompiledModel(std::make_unique<llvm::InlinerSizeModel>()) { + assert(CompiledModel && "The CompiledModel should be valid"); + + FeatureIndices.reserve(NumberOfFeatures); + + for (size_t I = 0; I < NumberOfFeatures; ++I) { + const int Index = + CompiledModel->LookupArgIndex(FeedPrefix + FeatureNameMap[I]); + assert(Index >= 0 && "Cannot find Feature in inlining model"); + FeatureIndices[I] = Index; + } + + ResultIndex = + CompiledModel->LookupResultIndex(std::string(FetchPrefix) + DecisionName); + assert(ResultIndex >= 0 && "Cannot find DecisionName in inlining model"); +} + +int64_t ReleaseModeModelRunner::getFeature(int Index) const { + return *static_cast<int64_t *>( + CompiledModel->arg_data(FeatureIndices[Index])); +} + +void ReleaseModeModelRunner::setFeature(FeatureIndex Index, int64_t Value) { + *static_cast<int64_t *>(CompiledModel->arg_data( + FeatureIndices[static_cast<size_t>(Index)])) = Value; +} + +bool ReleaseModeModelRunner::run() { + CompiledModel->Run(); + return static_cast<bool>( + *static_cast<int64_t *>(CompiledModel->result_data(ResultIndex))); +} + +std::unique_ptr<InlineAdvisor> +llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { + auto AOTRunner = std::make_unique<ReleaseModeModelRunner>(M.getContext()); + return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner)); +} diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 26a9a5ddf1ea7..48c686b732608 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -79,6 +79,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionDivision.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -86,7 +87,6 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" @@ -848,273 +848,14 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, } } -// Returns the size of the SCEV S. -static inline int sizeOfSCEV(const SCEV *S) { - struct FindSCEVSize { - int Size = 0; - - FindSCEVSize() = default; - - bool follow(const SCEV *S) { - ++Size; - // Keep looking at all operands of S. - return true; - } - - bool isDone() const { - return false; - } - }; - - FindSCEVSize F; - SCEVTraversal<FindSCEVSize> ST(F); - ST.visitAll(S); - return F.Size; -} - -/// Returns true if the subtree of \p S contains at least HugeExprThreshold -/// nodes. -static bool isHugeExpression(const SCEV *S) { - return S->getExpressionSize() >= HugeExprThreshold; -} - -/// Returns true of \p Ops contains a huge SCEV (see definition above). +/// Returns true if \p Ops contains a huge SCEV (the subtree of S contains at +/// least HugeExprThreshold nodes). static bool hasHugeExpression(ArrayRef<const SCEV *> Ops) { - return any_of(Ops, isHugeExpression); + return any_of(Ops, [](const SCEV *S) { + return S->getExpressionSize() >= HugeExprThreshold; + }); } -namespace { - -struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> { -public: - // Computes the Quotient and Remainder of the division of Numerator by - // Denominator. - static void divide(ScalarEvolution &SE, const SCEV *Numerator, - const SCEV *Denominator, const SCEV **Quotient, - const SCEV **Remainder) { - assert(Numerator && Denominator && "Uninitialized SCEV"); - - SCEVDivision D(SE, Numerator, Denominator); - - // Check for the trivial case here to avoid having to check for it in the - // rest of the code. - if (Numerator == Denominator) { - *Quotient = D.One; - *Remainder = D.Zero; - return; - } - - if (Numerator->isZero()) { - *Quotient = D.Zero; - *Remainder = D.Zero; - return; - } - - // A simple case when N/1. The quotient is N. - if (Denominator->isOne()) { - *Quotient = Numerator; - *Remainder = D.Zero; - return; - } - - // Split the Denominator when it is a product. - if (const SCEVMulExpr *T = dyn_cast<SCEVMulExpr>(Denominator)) { - const SCEV *Q, *R; - *Quotient = Numerator; - for (const SCEV *Op : T->operands()) { - divide(SE, *Quotient, Op, &Q, &R); - *Quotient = Q; - - // Bail out when the Numerator is not divisible by one of the terms of - // the Denominator. - if (!R->isZero()) { - *Quotient = D.Zero; - *Remainder = Numerator; - return; - } - } - *Remainder = D.Zero; - return; - } - - D.visit(Numerator); - *Quotient = D.Quotient; - *Remainder = D.Remainder; - } - - // Except in the trivial case described above, we do not know how to divide - // Expr by Denominator for the following functions with empty implementation. - void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {} - void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {} - void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {} - void visitUDivExpr(const SCEVUDivExpr *Numerator) {} - void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {} - void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {} - void visitSMinExpr(const SCEVSMinExpr *Numerator) {} - void visitUMinExpr(const SCEVUMinExpr *Numerator) {} - void visitUnknown(const SCEVUnknown *Numerator) {} - void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {} - - void visitConstant(const SCEVConstant *Numerator) { - if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) { - APInt NumeratorVal = Numerator->getAPInt(); - APInt DenominatorVal = D->getAPInt(); - uint32_t NumeratorBW = NumeratorVal.getBitWidth(); - uint32_t DenominatorBW = DenominatorVal.getBitWidth(); - - if (NumeratorBW > DenominatorBW) - DenominatorVal = DenominatorVal.sext(NumeratorBW); - else if (NumeratorBW < DenominatorBW) - NumeratorVal = NumeratorVal.sext(DenominatorBW); - - APInt QuotientVal(NumeratorVal.getBitWidth(), 0); - APInt RemainderVal(NumeratorVal.getBitWidth(), 0); - APInt::sdivrem(NumeratorVal, DenominatorVal, QuotientVal, RemainderVal); - Quotient = SE.getConstant(QuotientVal); - Remainder = SE.getConstant(RemainderVal); - return; - } - } - - void visitAddRecExpr(const SCEVAddRecExpr *Numerator) { - const SCEV *StartQ, *StartR, *StepQ, *StepR; - if (!Numerator->isAffine()) - return cannotDivide(Numerator); - divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR); - divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR); - // Bail out if the types do not match. - Type *Ty = Denominator->getType(); - if (Ty != StartQ->getType() || Ty != StartR->getType() || - Ty != StepQ->getType() || Ty != StepR->getType()) - return cannotDivide(Numerator); - Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(), - Numerator->getNoWrapFlags()); - Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), - Numerator->getNoWrapFlags()); - } - - void visitAddExpr(const SCEVAddExpr *Numerator) { - SmallVector<const SCEV *, 2> Qs, Rs; - Type *Ty = Denominator->getType(); - - for (const SCEV *Op : Numerator->operands()) { - const SCEV *Q, *R; - divide(SE, Op, Denominator, &Q, &R); - - // Bail out if types do not match. - if (Ty != Q->getType() || Ty != R->getType()) - return cannotDivide(Numerator); - - Qs.push_back(Q); - Rs.push_back(R); - } - - if (Qs.size() == 1) { - Quotient = Qs[0]; - Remainder = Rs[0]; - return; - } - - Quotient = SE.getAddExpr(Qs); - Remainder = SE.getAddExpr(Rs); - } - - void visitMulExpr(const SCEVMulExpr *Numerator) { - SmallVector<const SCEV *, 2> Qs; - Type *Ty = Denominator->getType(); - - bool FoundDenominatorTerm = false; - for (const SCEV *Op : Numerator->operands()) { - // Bail out if types do not match. - if (Ty != Op->getType()) - return cannotDivide(Numerator); - - if (FoundDenominatorTerm) { - Qs.push_back(Op); - continue; - } - - // Check whether Denominator divides one of the product operands. - const SCEV *Q, *R; - divide(SE, Op, Denominator, &Q, &R); - if (!R->isZero()) { - Qs.push_back(Op); - continue; - } - - // Bail out if types do not match. - if (Ty != Q->getType()) - return cannotDivide(Numerator); - - FoundDenominatorTerm = true; - Qs.push_back(Q); - } - - if (FoundDenominatorTerm) { - Remainder = Zero; - if (Qs.size() == 1) - Quotient = Qs[0]; - else - Quotient = SE.getMulExpr(Qs); - return; - } - - if (!isa<SCEVUnknown>(Denominator)) - return cannotDivide(Numerator); - - // The Remainder is obtained by replacing Denominator by 0 in Numerator. - ValueToValueMap RewriteMap; - RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = - cast<SCEVConstant>(Zero)->getValue(); - Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); - - if (Remainder->isZero()) { - // The Quotient is obtained by replacing Denominator by 1 in Numerator. - RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = - cast<SCEVConstant>(One)->getValue(); - Quotient = - SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); - return; - } - - // Quotient is (Numerator - Remainder) divided by Denominator. - const SCEV *Q, *R; - const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder); - // This SCEV does not seem to simplify: fail the division here. - if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) - return cannotDivide(Numerator); - divide(SE, Diff, Denominator, &Q, &R); - if (R != Zero) - return cannotDivide(Numerator); - Quotient = Q; - } - -private: - SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, - const SCEV *Denominator) - : SE(S), Denominator(Denominator) { - Zero = SE.getZero(Denominator->getType()); - One = SE.getOne(Denominator->getType()); - - // We generally do not know how to divide Expr by Denominator. We - // initialize the division to a "cannot divide" state to simplify the rest - // of the code. - cannotDivide(Numerator); - } - - // Convenience function for giving up on the division. We set the quotient to - // be equal to zero and the remainder to be equal to the numerator. - void cannotDivide(const SCEV *Numerator) { - Quotient = Zero; - Remainder = Numerator; - } - - ScalarEvolution &SE; - const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One; -}; - -} // end anonymous namespace - //===----------------------------------------------------------------------===// // Simple SCEV method implementations //===----------------------------------------------------------------------===// @@ -1612,7 +1353,7 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start, static APInt extractConstantWithoutWrapping(ScalarEvolution &SE, const SCEVConstant *ConstantTerm, const SCEVAddExpr *WholeAddExpr) { - const APInt C = ConstantTerm->getAPInt(); + const APInt &C = ConstantTerm->getAPInt(); const unsigned BitWidth = C.getBitWidth(); // Find number of trailing zeros of (x + y + ...) w/o the C first: uint32_t TZ = BitWidth; @@ -2455,6 +2196,11 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, if (Depth > MaxArithDepth || hasHugeExpression(Ops)) return getOrCreateAddExpr(Ops, Flags); + if (SCEV *S = std::get<0>(findExistingSCEVInCache(scAddExpr, Ops))) { + static_cast<SCEVAddExpr *>(S)->setNoWrapFlags(Flags); + return S; + } + // Okay, check to see if the same value occurs in the operand list more than // once. If so, merge them together into an multiply expression. Since we // sorted the list, these values are required to be adjacent. @@ -2930,10 +2676,17 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags); - // Limit recursion calls depth. - if (Depth > MaxArithDepth || hasHugeExpression(Ops)) + // Limit recursion calls depth, but fold all-constant expressions. + // `Ops` is sorted, so it's enough to check just last one. + if ((Depth > MaxArithDepth || hasHugeExpression(Ops)) && + !isa<SCEVConstant>(Ops.back())) return getOrCreateMulExpr(Ops, Flags); + if (SCEV *S = std::get<0>(findExistingSCEVInCache(scMulExpr, Ops))) { + static_cast<SCEVMulExpr *>(S)->setNoWrapFlags(Flags); + return S; + } + // If there are any constants, fold them together. unsigned Idx = 0; if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { @@ -3104,8 +2857,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // Limit max number of arguments to avoid creation of unreasonably big // SCEVAddRecs with very complex operands. if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 > - MaxAddRecSize || isHugeExpression(AddRec) || - isHugeExpression(OtherAddRec)) + MaxAddRecSize || hasHugeExpression({AddRec, OtherAddRec})) continue; bool Overflow = false; @@ -3197,6 +2949,14 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, getEffectiveSCEVType(RHS->getType()) && "SCEVUDivExpr operand types don't match!"); + FoldingSetNodeID ID; + ID.AddInteger(scUDivExpr); + ID.AddPointer(LHS); + ID.AddPointer(RHS); + void *IP = nullptr; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) + return S; + if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { if (RHSC->getValue()->isOne()) return LHS; // X udiv 1 --> x @@ -3243,9 +3003,24 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, AR->getLoop(), SCEV::FlagAnyWrap)) { const APInt &StartInt = StartC->getAPInt(); const APInt &StartRem = StartInt.urem(StepInt); - if (StartRem != 0) - LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step, - AR->getLoop(), SCEV::FlagNW); + if (StartRem != 0) { + const SCEV *NewLHS = + getAddRecExpr(getConstant(StartInt - StartRem), Step, + AR->getLoop(), SCEV::FlagNW); + if (LHS != NewLHS) { + LHS = NewLHS; + + // Reset the ID to include the new LHS, and check if it is + // already cached. + ID.clear(); + ID.AddInteger(scUDivExpr); + ID.AddPointer(LHS); + ID.AddPointer(RHS); + IP = nullptr; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) + return S; + } + } } } // (A*B)/C --> A*(B/C) if safe and B/C can be folded. @@ -3310,11 +3085,9 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, } } - FoldingSetNodeID ID; - ID.AddInteger(scUDivExpr); - ID.AddPointer(LHS); - ID.AddPointer(RHS); - void *IP = nullptr; + // The Insertion Point (IP) might be invalid by now (due to UniqueSCEVs + // changes). Make sure we get a new one. + IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), LHS, RHS); @@ -3505,9 +3278,8 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, : SCEV::FlagAnyWrap; const SCEV *TotalOffset = getZero(IntIdxTy); - // The array size is unimportant. The first thing we do on CurTy is getting - // its element type. - Type *CurTy = ArrayType::get(GEP->getSourceElementType(), 0); + Type *CurTy = GEP->getType(); + bool FirstIter = true; for (const SCEV *IndexExpr : IndexExprs) { // Compute the (potentially symbolic) offset in bytes for this index. if (StructType *STy = dyn_cast<StructType>(CurTy)) { @@ -3523,7 +3295,14 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, CurTy = STy->getTypeAtIndex(Index); } else { // Update CurTy to its element type. - CurTy = cast<SequentialType>(CurTy)->getElementType(); + if (FirstIter) { + assert(isa<PointerType>(CurTy) && + "The first index of a GEP indexes a pointer"); + CurTy = GEP->getSourceElementType(); + FirstIter = false; + } else { + CurTy = GetElementPtrInst::getTypeAtIndex(CurTy, (uint64_t)0); + } // For an array, add the element offset, explicitly scaled. const SCEV *ElementSize = getSizeOfExpr(IntIdxTy, CurTy); // Getelementptr indices are signed. @@ -3538,10 +3317,13 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, } // Add the total offset from all the GEP indices to the base. - return getAddExpr(BaseExpr, TotalOffset, Wrap); + auto *GEPExpr = getAddExpr(BaseExpr, TotalOffset, Wrap); + assert(BaseExpr->getType() == GEPExpr->getType() && + "GEP should not change type mid-flight."); + return GEPExpr; } -std::tuple<const SCEV *, FoldingSetNodeID, void *> +std::tuple<SCEV *, FoldingSetNodeID, void *> ScalarEvolution::findExistingSCEVInCache(int SCEVType, ArrayRef<const SCEV *> Ops) { FoldingSetNodeID ID; @@ -3549,7 +3331,7 @@ ScalarEvolution::findExistingSCEVInCache(int SCEVType, ID.AddInteger(SCEVType); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); - return std::tuple<const SCEV *, FoldingSetNodeID, void *>( + return std::tuple<SCEV *, FoldingSetNodeID, void *>( UniqueSCEVs.FindNodeOrInsertPos(ID, IP), std::move(ID), IP); } @@ -3727,6 +3509,12 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { // We can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. + if (isa<ScalableVectorType>(AllocTy)) { + Constant *NullPtr = Constant::getNullValue(AllocTy->getPointerTo()); + Constant *One = ConstantInt::get(IntTy, 1); + Constant *GEP = ConstantExpr::getGetElementPtr(AllocTy, NullPtr, One); + return getSCEV(ConstantExpr::getPtrToInt(GEP, IntTy)); + } return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy)); } @@ -3820,7 +3608,8 @@ bool ScalarEvolution::containsAddRecurrence(const SCEV *S) { if (I != HasRecMap.end()) return I->second; - bool FoundAddRec = SCEVExprContains(S, isa<SCEVAddRecExpr, const SCEV *>); + bool FoundAddRec = + SCEVExprContains(S, [](const SCEV *S) { return isa<SCEVAddRecExpr>(S); }); HasRecMap.insert({S, FoundAddRec}); return FoundAddRec; } @@ -4167,23 +3956,25 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { if (!V->getType()->isPointerTy()) return V; - if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) { - return getPointerBase(Cast->getOperand()); - } else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) { - const SCEV *PtrOp = nullptr; - for (const SCEV *NAryOp : NAry->operands()) { - if (NAryOp->getType()->isPointerTy()) { - // Cannot find the base of an expression with multiple pointer operands. - if (PtrOp) - return V; - PtrOp = NAryOp; + while (true) { + if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) { + V = Cast->getOperand(); + } else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) { + const SCEV *PtrOp = nullptr; + for (const SCEV *NAryOp : NAry->operands()) { + if (NAryOp->getType()->isPointerTy()) { + // Cannot find the base of an expression with multiple pointer ops. + if (PtrOp) + return V; + PtrOp = NAryOp; + } } - } - if (!PtrOp) + if (!PtrOp) // All operands were non-pointer. + return V; + V = PtrOp; + } else // Not something we can look further into. return V; - return getPointerBase(PtrOp); } - return V; } /// Push users of the given Instruction onto the given Worklist. @@ -5740,7 +5531,7 @@ ScalarEvolution::getRangeRef(const SCEV *S, // For a SCEVUnknown, ask ValueTracking. KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT); if (Known.getBitWidth() != BitWidth) - Known = Known.zextOrTrunc(BitWidth, true); + Known = Known.zextOrTrunc(BitWidth); // If Known does not result in full-set, intersect with it. if (Known.getMinValue() != Known.getMaxValue() + 1) ConservativeResult = ConservativeResult.intersectWith( @@ -6032,7 +5823,7 @@ bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) { return false; // Only proceed if we can prove that I does not yield poison. - if (!programUndefinedIfFullPoison(I)) + if (!programUndefinedIfPoison(I)) return false; // At this point we know that if I is executed, then it does not wrap @@ -6112,7 +5903,7 @@ bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) { SmallVector<const Instruction *, 8> PoisonStack; // We start by assuming \c I, the post-inc add recurrence, is poison. Only - // things that are known to be fully poison under that assumption go on the + // things that are known to be poison under that assumption go on the // PoisonStack. Pushed.insert(I); PoisonStack.push_back(I); @@ -6122,7 +5913,7 @@ bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) { const Instruction *Poison = PoisonStack.pop_back_val(); for (auto *PoisonUser : Poison->users()) { - if (propagatesFullPoison(cast<Instruction>(PoisonUser))) { + if (propagatesPoison(cast<Instruction>(PoisonUser))) { if (Pushed.insert(cast<Instruction>(PoisonUser)).second) PoisonStack.push_back(cast<Instruction>(PoisonUser)); } else if (auto *BI = dyn_cast<BranchInst>(PoisonUser)) { @@ -6349,15 +6140,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { if (GetMinTrailingZeros(LHS) >= (CIVal.getBitWidth() - CIVal.countLeadingZeros())) { // Build a plain add SCEV. - const SCEV *S = getAddExpr(LHS, getSCEV(CI)); - // If the LHS of the add was an addrec and it has no-wrap flags, - // transfer the no-wrap flags, since an or won't introduce a wrap. - if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) { - const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS); - const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags( - OldAR->getNoWrapFlags()); - } - return S; + return getAddExpr(LHS, getSCEV(CI), + (SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNSW)); } } break; @@ -6413,15 +6197,19 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { if (SA->getValue().uge(BitWidth)) break; - // It is currently not resolved how to interpret NSW for left - // shift by BitWidth - 1, so we avoid applying flags in that - // case. Remove this check (or this comment) once the situation - // is resolved. See - // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html - // and http://reviews.llvm.org/D8890 . + // We can safely preserve the nuw flag in all cases. It's also safe to + // turn a nuw nsw shl into a nuw nsw mul. However, nsw in isolation + // requires special handling. It can be preserved as long as we're not + // left shifting by bitwidth - 1. auto Flags = SCEV::FlagAnyWrap; - if (BO->Op && SA->getValue().ult(BitWidth - 1)) - Flags = getNoWrapFlagsFromUB(BO->Op); + if (BO->Op) { + auto MulFlags = getNoWrapFlagsFromUB(BO->Op); + if ((MulFlags & SCEV::FlagNSW) && + ((MulFlags & SCEV::FlagNUW) || SA->getValue().ult(BitWidth - 1))) + Flags = (SCEV::NoWrapFlags)(Flags | SCEV::FlagNSW); + if (MulFlags & SCEV::FlagNUW) + Flags = (SCEV::NoWrapFlags)(Flags | SCEV::FlagNUW); + } Constant *X = ConstantInt::get( getContext(), APInt::getOneBitSet(BitWidth, SA->getZExtValue())); @@ -6515,6 +6303,20 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { return getSCEV(U->getOperand(0)); break; + case Instruction::SDiv: + // If both operands are non-negative, this is just an udiv. + if (isKnownNonNegative(getSCEV(U->getOperand(0))) && + isKnownNonNegative(getSCEV(U->getOperand(1)))) + return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1))); + break; + + case Instruction::SRem: + // If both operands are non-negative, this is just an urem. + if (isKnownNonNegative(getSCEV(U->getOperand(0))) && + isKnownNonNegative(getSCEV(U->getOperand(1)))) + return getURemExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1))); + break; + // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can // lead to pointer expressions which cannot safely be expanded to GEPs, // because ScalarEvolution doesn't respect the GEP aliasing rules when @@ -6538,7 +6340,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { case Instruction::Call: case Instruction::Invoke: - if (Value *RV = CallSite(U).getReturnedArgOperand()) + if (Value *RV = cast<CallBase>(U)->getReturnedArgOperand()) return getSCEV(RV); break; } @@ -6644,7 +6446,7 @@ const SCEV *ScalarEvolution::getExitCount(const Loop *L, BasicBlock *ExitingBlock, ExitCountKind Kind) { switch (Kind) { - case Exact: + case Exact: return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); case ConstantMaximum: return getBackedgeTakenInfo(L).getMax(ExitingBlock, this); @@ -6661,7 +6463,7 @@ ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L, const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L, ExitCountKind Kind) { switch (Kind) { - case Exact: + case Exact: return getBackedgeTakenInfo(L).getExact(L, this); case ConstantMaximum: return getBackedgeTakenInfo(L).getMax(this); @@ -6924,6 +6726,10 @@ void ScalarEvolution::forgetValue(Value *V) { } } +void ScalarEvolution::forgetLoopDispositions(const Loop *L) { + LoopDispositions.clear(); +} + /// Get the exact loop backedge taken count considering all loop exits. A /// computable result can only be returned for loops with all exiting blocks /// dominating the latch. howFarToZero assumes that the limit of each loop test @@ -8244,10 +8050,11 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && isKnownPositive(BackedgeTakenCount) && PN->getNumIncomingValues() == 2) { + unsigned InLoopPred = LI->contains(PN->getIncomingBlock(0)) ? 0 : 1; - const SCEV *OnBackedge = getSCEV(PN->getIncomingValue(InLoopPred)); - if (IsAvailableOnEntry(LI, DT, OnBackedge, PN->getParent())) - return OnBackedge; + Value *BackedgeVal = PN->getIncomingValue(InLoopPred); + if (LI->isLoopInvariant(BackedgeVal)) + return getSCEV(BackedgeVal); } if (auto *BTCC = dyn_cast<SCEVConstant>(BackedgeTakenCount)) { // Okay, we know how many times the containing loop executes. If @@ -9226,9 +9033,11 @@ bool ScalarEvolution::isKnownViaInduction(ICmpInst::Predicate Pred, !isAvailableAtLoopEntry(SplitRHS.first, MDL)) return false; - return isLoopEntryGuardedByCond(MDL, Pred, SplitLHS.first, SplitRHS.first) && - isLoopBackedgeGuardedByCond(MDL, Pred, SplitLHS.second, - SplitRHS.second); + // It seems backedge guard check is faster than entry one so in some cases + // it can speed up whole estimation by short circuit + return isLoopBackedgeGuardedByCond(MDL, Pred, SplitLHS.second, + SplitRHS.second) && + isLoopEntryGuardedByCond(MDL, Pred, SplitLHS.first, SplitRHS.first); } bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, @@ -11161,8 +10970,9 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE, // Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter. static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) { for (const SCEV *T : Terms) - if (SCEVExprContains(T, isa<SCEVUnknown, const SCEV *>)) + if (SCEVExprContains(T, [](const SCEV *S) { return isa<SCEVUnknown>(S); })) return true; + return false; } @@ -11411,6 +11221,51 @@ void ScalarEvolution::delinearize(const SCEV *Expr, }); } +bool ScalarEvolution::getIndexExpressionsFromGEP( + const GetElementPtrInst *GEP, SmallVectorImpl<const SCEV *> &Subscripts, + SmallVectorImpl<int> &Sizes) { + assert(Subscripts.empty() && Sizes.empty() && + "Expected output lists to be empty on entry to this function."); + assert(GEP && "getIndexExpressionsFromGEP called with a null GEP"); + Type *Ty = GEP->getPointerOperandType(); + bool DroppedFirstDim = false; + for (unsigned i = 1; i < GEP->getNumOperands(); i++) { + const SCEV *Expr = getSCEV(GEP->getOperand(i)); + if (i == 1) { + if (auto *PtrTy = dyn_cast<PointerType>(Ty)) { + Ty = PtrTy->getElementType(); + } else if (auto *ArrayTy = dyn_cast<ArrayType>(Ty)) { + Ty = ArrayTy->getElementType(); + } else { + Subscripts.clear(); + Sizes.clear(); + return false; + } + if (auto *Const = dyn_cast<SCEVConstant>(Expr)) + if (Const->getValue()->isZero()) { + DroppedFirstDim = true; + continue; + } + Subscripts.push_back(Expr); + continue; + } + + auto *ArrayTy = dyn_cast<ArrayType>(Ty); + if (!ArrayTy) { + Subscripts.clear(); + Sizes.clear(); + return false; + } + + Subscripts.push_back(Expr); + if (!(DroppedFirstDim && i == 2)) + Sizes.push_back(ArrayTy->getNumElements()); + + Ty = ArrayTy->getElementType(); + } + return !Subscripts.empty(); +} + //===----------------------------------------------------------------------===// // SCEVCallbackVH Class Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Analysis/ScalarEvolutionDivision.cpp b/llvm/lib/Analysis/ScalarEvolutionDivision.cpp new file mode 100644 index 0000000000000..19bf5766f4480 --- /dev/null +++ b/llvm/lib/Analysis/ScalarEvolutionDivision.cpp @@ -0,0 +1,259 @@ +//===- ScalarEvolutionDivision.h - See below --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the class that knows how to divide SCEV's. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ScalarEvolutionDivision.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <cstdint> + +namespace llvm { +class Type; +} + +using namespace llvm; + +namespace { + +static inline int sizeOfSCEV(const SCEV *S) { + struct FindSCEVSize { + int Size = 0; + + FindSCEVSize() = default; + + bool follow(const SCEV *S) { + ++Size; + // Keep looking at all operands of S. + return true; + } + + bool isDone() const { return false; } + }; + + FindSCEVSize F; + SCEVTraversal<FindSCEVSize> ST(F); + ST.visitAll(S); + return F.Size; +} + +} // namespace + +// Computes the Quotient and Remainder of the division of Numerator by +// Denominator. +void SCEVDivision::divide(ScalarEvolution &SE, const SCEV *Numerator, + const SCEV *Denominator, const SCEV **Quotient, + const SCEV **Remainder) { + assert(Numerator && Denominator && "Uninitialized SCEV"); + + SCEVDivision D(SE, Numerator, Denominator); + + // Check for the trivial case here to avoid having to check for it in the + // rest of the code. + if (Numerator == Denominator) { + *Quotient = D.One; + *Remainder = D.Zero; + return; + } + + if (Numerator->isZero()) { + *Quotient = D.Zero; + *Remainder = D.Zero; + return; + } + + // A simple case when N/1. The quotient is N. + if (Denominator->isOne()) { + *Quotient = Numerator; + *Remainder = D.Zero; + return; + } + + // Split the Denominator when it is a product. + if (const SCEVMulExpr *T = dyn_cast<SCEVMulExpr>(Denominator)) { + const SCEV *Q, *R; + *Quotient = Numerator; + for (const SCEV *Op : T->operands()) { + divide(SE, *Quotient, Op, &Q, &R); + *Quotient = Q; + + // Bail out when the Numerator is not divisible by one of the terms of + // the Denominator. + if (!R->isZero()) { + *Quotient = D.Zero; + *Remainder = Numerator; + return; + } + } + *Remainder = D.Zero; + return; + } + + D.visit(Numerator); + *Quotient = D.Quotient; + *Remainder = D.Remainder; +} + +void SCEVDivision::visitConstant(const SCEVConstant *Numerator) { + if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) { + APInt NumeratorVal = Numerator->getAPInt(); + APInt DenominatorVal = D->getAPInt(); + uint32_t NumeratorBW = NumeratorVal.getBitWidth(); + uint32_t DenominatorBW = DenominatorVal.getBitWidth(); + + if (NumeratorBW > DenominatorBW) + DenominatorVal = DenominatorVal.sext(NumeratorBW); + else if (NumeratorBW < DenominatorBW) + NumeratorVal = NumeratorVal.sext(DenominatorBW); + + APInt QuotientVal(NumeratorVal.getBitWidth(), 0); + APInt RemainderVal(NumeratorVal.getBitWidth(), 0); + APInt::sdivrem(NumeratorVal, DenominatorVal, QuotientVal, RemainderVal); + Quotient = SE.getConstant(QuotientVal); + Remainder = SE.getConstant(RemainderVal); + return; + } +} + +void SCEVDivision::visitAddRecExpr(const SCEVAddRecExpr *Numerator) { + const SCEV *StartQ, *StartR, *StepQ, *StepR; + if (!Numerator->isAffine()) + return cannotDivide(Numerator); + divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR); + divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR); + // Bail out if the types do not match. + Type *Ty = Denominator->getType(); + if (Ty != StartQ->getType() || Ty != StartR->getType() || + Ty != StepQ->getType() || Ty != StepR->getType()) + return cannotDivide(Numerator); + Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(), + Numerator->getNoWrapFlags()); + Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), + Numerator->getNoWrapFlags()); +} + +void SCEVDivision::visitAddExpr(const SCEVAddExpr *Numerator) { + SmallVector<const SCEV *, 2> Qs, Rs; + Type *Ty = Denominator->getType(); + + for (const SCEV *Op : Numerator->operands()) { + const SCEV *Q, *R; + divide(SE, Op, Denominator, &Q, &R); + + // Bail out if types do not match. + if (Ty != Q->getType() || Ty != R->getType()) + return cannotDivide(Numerator); + + Qs.push_back(Q); + Rs.push_back(R); + } + + if (Qs.size() == 1) { + Quotient = Qs[0]; + Remainder = Rs[0]; + return; + } + + Quotient = SE.getAddExpr(Qs); + Remainder = SE.getAddExpr(Rs); +} + +void SCEVDivision::visitMulExpr(const SCEVMulExpr *Numerator) { + SmallVector<const SCEV *, 2> Qs; + Type *Ty = Denominator->getType(); + + bool FoundDenominatorTerm = false; + for (const SCEV *Op : Numerator->operands()) { + // Bail out if types do not match. + if (Ty != Op->getType()) + return cannotDivide(Numerator); + + if (FoundDenominatorTerm) { + Qs.push_back(Op); + continue; + } + + // Check whether Denominator divides one of the product operands. + const SCEV *Q, *R; + divide(SE, Op, Denominator, &Q, &R); + if (!R->isZero()) { + Qs.push_back(Op); + continue; + } + + // Bail out if types do not match. + if (Ty != Q->getType()) + return cannotDivide(Numerator); + + FoundDenominatorTerm = true; + Qs.push_back(Q); + } + + if (FoundDenominatorTerm) { + Remainder = Zero; + if (Qs.size() == 1) + Quotient = Qs[0]; + else + Quotient = SE.getMulExpr(Qs); + return; + } + + if (!isa<SCEVUnknown>(Denominator)) + return cannotDivide(Numerator); + + // The Remainder is obtained by replacing Denominator by 0 in Numerator. + ValueToValueMap RewriteMap; + RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = + cast<SCEVConstant>(Zero)->getValue(); + Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); + + if (Remainder->isZero()) { + // The Quotient is obtained by replacing Denominator by 1 in Numerator. + RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = + cast<SCEVConstant>(One)->getValue(); + Quotient = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); + return; + } + + // Quotient is (Numerator - Remainder) divided by Denominator. + const SCEV *Q, *R; + const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder); + // This SCEV does not seem to simplify: fail the division here. + if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) + return cannotDivide(Numerator); + divide(SE, Diff, Denominator, &Q, &R); + if (R != Zero) + return cannotDivide(Numerator); + Quotient = Q; +} + +SCEVDivision::SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, + const SCEV *Denominator) + : SE(S), Denominator(Denominator) { + Zero = SE.getZero(Denominator->getType()); + One = SE.getOne(Denominator->getType()); + + // We generally do not know how to divide Expr by Denominator. We initialize + // the division to a "cannot divide" state to simplify the rest of the code. + cannotDivide(Numerator); +} + +// Convenience function for giving up on the division. We set the quotient to +// be equal to zero and the remainder to be equal to the numerator. +void SCEVDivision::cannotDivide(const SCEV *Numerator) { + Quotient = Zero; + Remainder = Numerator; +} diff --git a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp deleted file mode 100644 index dc5d02aa3a3cb..0000000000000 --- a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ /dev/null @@ -1,2452 +0,0 @@ -//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains the implementation of the scalar evolution expander, -// which is used to generate the code corresponding to a given scalar evolution -// expression. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; -using namespace PatternMatch; - -/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, -/// reusing an existing cast if a suitable one exists, moving an existing -/// cast if a suitable one exists but isn't in the right place, or -/// creating a new one. -Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, - Instruction::CastOps Op, - BasicBlock::iterator IP) { - // This function must be called with the builder having a valid insertion - // point. It doesn't need to be the actual IP where the uses of the returned - // cast will be added, but it must dominate such IP. - // We use this precondition to produce a cast that will dominate all its - // uses. In particular, this is crucial for the case where the builder's - // insertion point *is* the point where we were asked to put the cast. - // Since we don't know the builder's insertion point is actually - // where the uses will be added (only that it dominates it), we are - // not allowed to move it. - BasicBlock::iterator BIP = Builder.GetInsertPoint(); - - Instruction *Ret = nullptr; - - // Check to see if there is already a cast! - for (User *U : V->users()) - if (U->getType() == Ty) - if (CastInst *CI = dyn_cast<CastInst>(U)) - if (CI->getOpcode() == Op) { - // If the cast isn't where we want it, create a new cast at IP. - // Likewise, do not reuse a cast at BIP because it must dominate - // instructions that might be inserted before BIP. - if (BasicBlock::iterator(CI) != IP || BIP == IP) { - // Create a new cast, and leave the old cast in place in case - // it is being used as an insert point. - Ret = CastInst::Create(Op, V, Ty, "", &*IP); - Ret->takeName(CI); - CI->replaceAllUsesWith(Ret); - break; - } - Ret = CI; - break; - } - - // Create a new cast. - if (!Ret) - Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP); - - // We assert at the end of the function since IP might point to an - // instruction with different dominance properties than a cast - // (an invoke for example) and not dominate BIP (but the cast does). - assert(SE.DT.dominates(Ret, &*BIP)); - - rememberInstruction(Ret); - return Ret; -} - -static BasicBlock::iterator findInsertPointAfter(Instruction *I, - BasicBlock *MustDominate) { - BasicBlock::iterator IP = ++I->getIterator(); - if (auto *II = dyn_cast<InvokeInst>(I)) - IP = II->getNormalDest()->begin(); - - while (isa<PHINode>(IP)) - ++IP; - - if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) { - ++IP; - } else if (isa<CatchSwitchInst>(IP)) { - IP = MustDominate->getFirstInsertionPt(); - } else { - assert(!IP->isEHPad() && "unexpected eh pad!"); - } - - return IP; -} - -/// InsertNoopCastOfTo - Insert a cast of V to the specified type, -/// which must be possible with a noop cast, doing what we can to share -/// the casts. -Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { - Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false); - assert((Op == Instruction::BitCast || - Op == Instruction::PtrToInt || - Op == Instruction::IntToPtr) && - "InsertNoopCastOfTo cannot perform non-noop casts!"); - assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) && - "InsertNoopCastOfTo cannot change sizes!"); - - // Short-circuit unnecessary bitcasts. - if (Op == Instruction::BitCast) { - if (V->getType() == Ty) - return V; - if (CastInst *CI = dyn_cast<CastInst>(V)) { - if (CI->getOperand(0)->getType() == Ty) - return CI->getOperand(0); - } - } - // Short-circuit unnecessary inttoptr<->ptrtoint casts. - if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) && - SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) { - if (CastInst *CI = dyn_cast<CastInst>(V)) - if ((CI->getOpcode() == Instruction::PtrToInt || - CI->getOpcode() == Instruction::IntToPtr) && - SE.getTypeSizeInBits(CI->getType()) == - SE.getTypeSizeInBits(CI->getOperand(0)->getType())) - return CI->getOperand(0); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - if ((CE->getOpcode() == Instruction::PtrToInt || - CE->getOpcode() == Instruction::IntToPtr) && - SE.getTypeSizeInBits(CE->getType()) == - SE.getTypeSizeInBits(CE->getOperand(0)->getType())) - return CE->getOperand(0); - } - - // Fold a cast of a constant. - if (Constant *C = dyn_cast<Constant>(V)) - return ConstantExpr::getCast(Op, C, Ty); - - // Cast the argument at the beginning of the entry block, after - // any bitcasts of other arguments. - if (Argument *A = dyn_cast<Argument>(V)) { - BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin(); - while ((isa<BitCastInst>(IP) && - isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) && - cast<BitCastInst>(IP)->getOperand(0) != A) || - isa<DbgInfoIntrinsic>(IP)) - ++IP; - return ReuseOrCreateCast(A, Ty, Op, IP); - } - - // Cast the instruction immediately after the instruction. - Instruction *I = cast<Instruction>(V); - BasicBlock::iterator IP = findInsertPointAfter(I, Builder.GetInsertBlock()); - return ReuseOrCreateCast(I, Ty, Op, IP); -} - -/// InsertBinop - Insert the specified binary operator, doing a small amount -/// of work to avoid inserting an obviously redundant operation, and hoisting -/// to an outer loop when the opportunity is there and it is safe. -Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, - Value *LHS, Value *RHS, - SCEV::NoWrapFlags Flags, bool IsSafeToHoist) { - // Fold a binop with constant operands. - if (Constant *CLHS = dyn_cast<Constant>(LHS)) - if (Constant *CRHS = dyn_cast<Constant>(RHS)) - return ConstantExpr::get(Opcode, CLHS, CRHS); - - // Do a quick scan to see if we have this binop nearby. If so, reuse it. - unsigned ScanLimit = 6; - BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); - // Scanning starts from the last instruction before the insertion point. - BasicBlock::iterator IP = Builder.GetInsertPoint(); - if (IP != BlockBegin) { - --IP; - for (; ScanLimit; --IP, --ScanLimit) { - // Don't count dbg.value against the ScanLimit, to avoid perturbing the - // generated code. - if (isa<DbgInfoIntrinsic>(IP)) - ScanLimit++; - - auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) { - // Ensure that no-wrap flags match. - if (isa<OverflowingBinaryOperator>(I)) { - if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW)) - return true; - if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW)) - return true; - } - // Conservatively, do not use any instruction which has any of exact - // flags installed. - if (isa<PossiblyExactOperator>(I) && I->isExact()) - return true; - return false; - }; - if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && - IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP)) - return &*IP; - if (IP == BlockBegin) break; - } - } - - // Save the original insertion point so we can restore it when we're done. - DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc(); - SCEVInsertPointGuard Guard(Builder, this); - - if (IsSafeToHoist) { - // Move the insertion point out of as many loops as we can. - while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { - if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) break; - - // Ok, move up a level. - Builder.SetInsertPoint(Preheader->getTerminator()); - } - } - - // If we haven't found this binop, insert it. - Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS)); - BO->setDebugLoc(Loc); - if (Flags & SCEV::FlagNUW) - BO->setHasNoUnsignedWrap(); - if (Flags & SCEV::FlagNSW) - BO->setHasNoSignedWrap(); - rememberInstruction(BO); - - return BO; -} - -/// FactorOutConstant - Test if S is divisible by Factor, using signed -/// division. If so, update S with Factor divided out and return true. -/// S need not be evenly divisible if a reasonable remainder can be -/// computed. -static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, - const SCEV *Factor, ScalarEvolution &SE, - const DataLayout &DL) { - // Everything is divisible by one. - if (Factor->isOne()) - return true; - - // x/x == 1. - if (S == Factor) { - S = SE.getConstant(S->getType(), 1); - return true; - } - - // For a Constant, check for a multiple of the given factor. - if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { - // 0/x == 0. - if (C->isZero()) - return true; - // Check for divisibility. - if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) { - ConstantInt *CI = - ConstantInt::get(SE.getContext(), C->getAPInt().sdiv(FC->getAPInt())); - // If the quotient is zero and the remainder is non-zero, reject - // the value at this scale. It will be considered for subsequent - // smaller scales. - if (!CI->isZero()) { - const SCEV *Div = SE.getConstant(CI); - S = Div; - Remainder = SE.getAddExpr( - Remainder, SE.getConstant(C->getAPInt().srem(FC->getAPInt()))); - return true; - } - } - } - - // In a Mul, check if there is a constant operand which is a multiple - // of the given factor. - if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { - // Size is known, check if there is a constant operand which is a multiple - // of the given factor. If so, we can factor it. - const SCEVConstant *FC = cast<SCEVConstant>(Factor); - if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) - if (!C->getAPInt().srem(FC->getAPInt())) { - SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); - NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt())); - S = SE.getMulExpr(NewMulOps); - return true; - } - } - - // In an AddRec, check if both start and step are divisible. - if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { - const SCEV *Step = A->getStepRecurrence(SE); - const SCEV *StepRem = SE.getConstant(Step->getType(), 0); - if (!FactorOutConstant(Step, StepRem, Factor, SE, DL)) - return false; - if (!StepRem->isZero()) - return false; - const SCEV *Start = A->getStart(); - if (!FactorOutConstant(Start, Remainder, Factor, SE, DL)) - return false; - S = SE.getAddRecExpr(Start, Step, A->getLoop(), - A->getNoWrapFlags(SCEV::FlagNW)); - return true; - } - - return false; -} - -/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs -/// is the number of SCEVAddRecExprs present, which are kept at the end of -/// the list. -/// -static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops, - Type *Ty, - ScalarEvolution &SE) { - unsigned NumAddRecs = 0; - for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i) - ++NumAddRecs; - // Group Ops into non-addrecs and addrecs. - SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs); - SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end()); - // Let ScalarEvolution sort and simplify the non-addrecs list. - const SCEV *Sum = NoAddRecs.empty() ? - SE.getConstant(Ty, 0) : - SE.getAddExpr(NoAddRecs); - // If it returned an add, use the operands. Otherwise it simplified - // the sum into a single value, so just use that. - Ops.clear(); - if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum)) - Ops.append(Add->op_begin(), Add->op_end()); - else if (!Sum->isZero()) - Ops.push_back(Sum); - // Then append the addrecs. - Ops.append(AddRecs.begin(), AddRecs.end()); -} - -/// SplitAddRecs - Flatten a list of add operands, moving addrec start values -/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}. -/// This helps expose more opportunities for folding parts of the expressions -/// into GEP indices. -/// -static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, - Type *Ty, - ScalarEvolution &SE) { - // Find the addrecs. - SmallVector<const SCEV *, 8> AddRecs; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) { - const SCEV *Start = A->getStart(); - if (Start->isZero()) break; - const SCEV *Zero = SE.getConstant(Ty, 0); - AddRecs.push_back(SE.getAddRecExpr(Zero, - A->getStepRecurrence(SE), - A->getLoop(), - A->getNoWrapFlags(SCEV::FlagNW))); - if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) { - Ops[i] = Zero; - Ops.append(Add->op_begin(), Add->op_end()); - e += Add->getNumOperands(); - } else { - Ops[i] = Start; - } - } - if (!AddRecs.empty()) { - // Add the addrecs onto the end of the list. - Ops.append(AddRecs.begin(), AddRecs.end()); - // Resort the operand list, moving any constants to the front. - SimplifyAddOperands(Ops, Ty, SE); - } -} - -/// expandAddToGEP - Expand an addition expression with a pointer type into -/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps -/// BasicAliasAnalysis and other passes analyze the result. See the rules -/// for getelementptr vs. inttoptr in -/// http://llvm.org/docs/LangRef.html#pointeraliasing -/// for details. -/// -/// Design note: The correctness of using getelementptr here depends on -/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as -/// they may introduce pointer arithmetic which may not be safely converted -/// into getelementptr. -/// -/// Design note: It might seem desirable for this function to be more -/// loop-aware. If some of the indices are loop-invariant while others -/// aren't, it might seem desirable to emit multiple GEPs, keeping the -/// loop-invariant portions of the overall computation outside the loop. -/// However, there are a few reasons this is not done here. Hoisting simple -/// arithmetic is a low-level optimization that often isn't very -/// important until late in the optimization process. In fact, passes -/// like InstructionCombining will combine GEPs, even if it means -/// pushing loop-invariant computation down into loops, so even if the -/// GEPs were split here, the work would quickly be undone. The -/// LoopStrengthReduction pass, which is usually run quite late (and -/// after the last InstructionCombining pass), takes care of hoisting -/// loop-invariant portions of expressions, after considering what -/// can be folded using target addressing modes. -/// -Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, - const SCEV *const *op_end, - PointerType *PTy, - Type *Ty, - Value *V) { - Type *OriginalElTy = PTy->getElementType(); - Type *ElTy = OriginalElTy; - SmallVector<Value *, 4> GepIndices; - SmallVector<const SCEV *, 8> Ops(op_begin, op_end); - bool AnyNonZeroIndices = false; - - // Split AddRecs up into parts as either of the parts may be usable - // without the other. - SplitAddRecs(Ops, Ty, SE); - - Type *IntIdxTy = DL.getIndexType(PTy); - - // Descend down the pointer's type and attempt to convert the other - // operands into GEP indices, at each level. The first index in a GEP - // indexes into the array implied by the pointer operand; the rest of - // the indices index into the element or field type selected by the - // preceding index. - for (;;) { - // If the scale size is not 0, attempt to factor out a scale for - // array indexing. - SmallVector<const SCEV *, 8> ScaledOps; - if (ElTy->isSized()) { - const SCEV *ElSize = SE.getSizeOfExpr(IntIdxTy, ElTy); - if (!ElSize->isZero()) { - SmallVector<const SCEV *, 8> NewOps; - for (const SCEV *Op : Ops) { - const SCEV *Remainder = SE.getConstant(Ty, 0); - if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) { - // Op now has ElSize factored out. - ScaledOps.push_back(Op); - if (!Remainder->isZero()) - NewOps.push_back(Remainder); - AnyNonZeroIndices = true; - } else { - // The operand was not divisible, so add it to the list of operands - // we'll scan next iteration. - NewOps.push_back(Op); - } - } - // If we made any changes, update Ops. - if (!ScaledOps.empty()) { - Ops = NewOps; - SimplifyAddOperands(Ops, Ty, SE); - } - } - } - - // Record the scaled array index for this level of the type. If - // we didn't find any operands that could be factored, tentatively - // assume that element zero was selected (since the zero offset - // would obviously be folded away). - Value *Scaled = ScaledOps.empty() ? - Constant::getNullValue(Ty) : - expandCodeFor(SE.getAddExpr(ScaledOps), Ty); - GepIndices.push_back(Scaled); - - // Collect struct field index operands. - while (StructType *STy = dyn_cast<StructType>(ElTy)) { - bool FoundFieldNo = false; - // An empty struct has no fields. - if (STy->getNumElements() == 0) break; - // Field offsets are known. See if a constant offset falls within any of - // the struct fields. - if (Ops.empty()) - break; - if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0])) - if (SE.getTypeSizeInBits(C->getType()) <= 64) { - const StructLayout &SL = *DL.getStructLayout(STy); - uint64_t FullOffset = C->getValue()->getZExtValue(); - if (FullOffset < SL.getSizeInBytes()) { - unsigned ElIdx = SL.getElementContainingOffset(FullOffset); - GepIndices.push_back( - ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); - ElTy = STy->getTypeAtIndex(ElIdx); - Ops[0] = - SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx)); - AnyNonZeroIndices = true; - FoundFieldNo = true; - } - } - // If no struct field offsets were found, tentatively assume that - // field zero was selected (since the zero offset would obviously - // be folded away). - if (!FoundFieldNo) { - ElTy = STy->getTypeAtIndex(0u); - GepIndices.push_back( - Constant::getNullValue(Type::getInt32Ty(Ty->getContext()))); - } - } - - if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) - ElTy = ATy->getElementType(); - else - break; - } - - // If none of the operands were convertible to proper GEP indices, cast - // the base to i8* and do an ugly getelementptr with that. It's still - // better than ptrtoint+arithmetic+inttoptr at least. - if (!AnyNonZeroIndices) { - // Cast the base to i8*. - V = InsertNoopCastOfTo(V, - Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); - - assert(!isa<Instruction>(V) || - SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint())); - - // Expand the operands for a plain byte offset. - Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); - - // Fold a GEP with constant operands. - if (Constant *CLHS = dyn_cast<Constant>(V)) - if (Constant *CRHS = dyn_cast<Constant>(Idx)) - return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ty->getContext()), - CLHS, CRHS); - - // Do a quick scan to see if we have this GEP nearby. If so, reuse it. - unsigned ScanLimit = 6; - BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); - // Scanning starts from the last instruction before the insertion point. - BasicBlock::iterator IP = Builder.GetInsertPoint(); - if (IP != BlockBegin) { - --IP; - for (; ScanLimit; --IP, --ScanLimit) { - // Don't count dbg.value against the ScanLimit, to avoid perturbing the - // generated code. - if (isa<DbgInfoIntrinsic>(IP)) - ScanLimit++; - if (IP->getOpcode() == Instruction::GetElementPtr && - IP->getOperand(0) == V && IP->getOperand(1) == Idx) - return &*IP; - if (IP == BlockBegin) break; - } - } - - // Save the original insertion point so we can restore it when we're done. - SCEVInsertPointGuard Guard(Builder, this); - - // Move the insertion point out of as many loops as we can. - while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { - if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break; - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) break; - - // Ok, move up a level. - Builder.SetInsertPoint(Preheader->getTerminator()); - } - - // Emit a GEP. - Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep"); - rememberInstruction(GEP); - - return GEP; - } - - { - SCEVInsertPointGuard Guard(Builder, this); - - // Move the insertion point out of as many loops as we can. - while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { - if (!L->isLoopInvariant(V)) break; - - bool AnyIndexNotLoopInvariant = any_of( - GepIndices, [L](Value *Op) { return !L->isLoopInvariant(Op); }); - - if (AnyIndexNotLoopInvariant) - break; - - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) break; - - // Ok, move up a level. - Builder.SetInsertPoint(Preheader->getTerminator()); - } - - // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, - // because ScalarEvolution may have changed the address arithmetic to - // compute a value which is beyond the end of the allocated object. - Value *Casted = V; - if (V->getType() != PTy) - Casted = InsertNoopCastOfTo(Casted, PTy); - Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep"); - Ops.push_back(SE.getUnknown(GEP)); - rememberInstruction(GEP); - } - - return expand(SE.getAddExpr(Ops)); -} - -Value *SCEVExpander::expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty, - Value *V) { - const SCEV *const Ops[1] = {Op}; - return expandAddToGEP(Ops, Ops + 1, PTy, Ty, V); -} - -/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for -/// SCEV expansion. If they are nested, this is the most nested. If they are -/// neighboring, pick the later. -static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, - DominatorTree &DT) { - if (!A) return B; - if (!B) return A; - if (A->contains(B)) return B; - if (B->contains(A)) return A; - if (DT.dominates(A->getHeader(), B->getHeader())) return B; - if (DT.dominates(B->getHeader(), A->getHeader())) return A; - return A; // Arbitrarily break the tie. -} - -/// getRelevantLoop - Get the most relevant loop associated with the given -/// expression, according to PickMostRelevantLoop. -const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { - // Test whether we've already computed the most relevant loop for this SCEV. - auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr)); - if (!Pair.second) - return Pair.first->second; - - if (isa<SCEVConstant>(S)) - // A constant has no relevant loops. - return nullptr; - if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { - if (const Instruction *I = dyn_cast<Instruction>(U->getValue())) - return Pair.first->second = SE.LI.getLoopFor(I->getParent()); - // A non-instruction has no relevant loops. - return nullptr; - } - if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) { - const Loop *L = nullptr; - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) - L = AR->getLoop(); - for (const SCEV *Op : N->operands()) - L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT); - return RelevantLoops[N] = L; - } - if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) { - const Loop *Result = getRelevantLoop(C->getOperand()); - return RelevantLoops[C] = Result; - } - if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { - const Loop *Result = PickMostRelevantLoop( - getRelevantLoop(D->getLHS()), getRelevantLoop(D->getRHS()), SE.DT); - return RelevantLoops[D] = Result; - } - llvm_unreachable("Unexpected SCEV type!"); -} - -namespace { - -/// LoopCompare - Compare loops by PickMostRelevantLoop. -class LoopCompare { - DominatorTree &DT; -public: - explicit LoopCompare(DominatorTree &dt) : DT(dt) {} - - bool operator()(std::pair<const Loop *, const SCEV *> LHS, - std::pair<const Loop *, const SCEV *> RHS) const { - // Keep pointer operands sorted at the end. - if (LHS.second->getType()->isPointerTy() != - RHS.second->getType()->isPointerTy()) - return LHS.second->getType()->isPointerTy(); - - // Compare loops with PickMostRelevantLoop. - if (LHS.first != RHS.first) - return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first; - - // If one operand is a non-constant negative and the other is not, - // put the non-constant negative on the right so that a sub can - // be used instead of a negate and add. - if (LHS.second->isNonConstantNegative()) { - if (!RHS.second->isNonConstantNegative()) - return false; - } else if (RHS.second->isNonConstantNegative()) - return true; - - // Otherwise they are equivalent according to this comparison. - return false; - } -}; - -} - -Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { - Type *Ty = SE.getEffectiveSCEVType(S->getType()); - - // Collect all the add operands in a loop, along with their associated loops. - // Iterate in reverse so that constants are emitted last, all else equal, and - // so that pointer operands are inserted first, which the code below relies on - // to form more involved GEPs. - SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; - for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()), - E(S->op_begin()); I != E; ++I) - OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); - - // Sort by loop. Use a stable sort so that constants follow non-constants and - // pointer operands precede non-pointer operands. - llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT)); - - // Emit instructions to add all the operands. Hoist as much as possible - // out of loops, and form meaningful getelementptrs where possible. - Value *Sum = nullptr; - for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) { - const Loop *CurLoop = I->first; - const SCEV *Op = I->second; - if (!Sum) { - // This is the first operand. Just expand it. - Sum = expand(Op); - ++I; - } else if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) { - // The running sum expression is a pointer. Try to form a getelementptr - // at this level with that as the base. - SmallVector<const SCEV *, 4> NewOps; - for (; I != E && I->first == CurLoop; ++I) { - // If the operand is SCEVUnknown and not instructions, peek through - // it, to enable more of it to be folded into the GEP. - const SCEV *X = I->second; - if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X)) - if (!isa<Instruction>(U->getValue())) - X = SE.getSCEV(U->getValue()); - NewOps.push_back(X); - } - Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum); - } else if (PointerType *PTy = dyn_cast<PointerType>(Op->getType())) { - // The running sum is an integer, and there's a pointer at this level. - // Try to form a getelementptr. If the running sum is instructions, - // use a SCEVUnknown to avoid re-analyzing them. - SmallVector<const SCEV *, 4> NewOps; - NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) : - SE.getSCEV(Sum)); - for (++I; I != E && I->first == CurLoop; ++I) - NewOps.push_back(I->second); - Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op)); - } else if (Op->isNonConstantNegative()) { - // Instead of doing a negate and add, just do a subtract. - Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty); - Sum = InsertNoopCastOfTo(Sum, Ty); - Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap, - /*IsSafeToHoist*/ true); - ++I; - } else { - // A simple add. - Value *W = expandCodeFor(Op, Ty); - Sum = InsertNoopCastOfTo(Sum, Ty); - // Canonicalize a constant to the RHS. - if (isa<Constant>(Sum)) std::swap(Sum, W); - Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(), - /*IsSafeToHoist*/ true); - ++I; - } - } - - return Sum; -} - -Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { - Type *Ty = SE.getEffectiveSCEVType(S->getType()); - - // Collect all the mul operands in a loop, along with their associated loops. - // Iterate in reverse so that constants are emitted last, all else equal. - SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; - for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()), - E(S->op_begin()); I != E; ++I) - OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); - - // Sort by loop. Use a stable sort so that constants follow non-constants. - llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT)); - - // Emit instructions to mul all the operands. Hoist as much as possible - // out of loops. - Value *Prod = nullptr; - auto I = OpsAndLoops.begin(); - - // Expand the calculation of X pow N in the following manner: - // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then: - // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK). - const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty]() { - auto E = I; - // Calculate how many times the same operand from the same loop is included - // into this power. - uint64_t Exponent = 0; - const uint64_t MaxExponent = UINT64_MAX >> 1; - // No one sane will ever try to calculate such huge exponents, but if we - // need this, we stop on UINT64_MAX / 2 because we need to exit the loop - // below when the power of 2 exceeds our Exponent, and we want it to be - // 1u << 31 at most to not deal with unsigned overflow. - while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) { - ++Exponent; - ++E; - } - assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?"); - - // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them - // that are needed into the result. - Value *P = expandCodeFor(I->second, Ty); - Value *Result = nullptr; - if (Exponent & 1) - Result = P; - for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) { - P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap, - /*IsSafeToHoist*/ true); - if (Exponent & BinExp) - Result = Result ? InsertBinop(Instruction::Mul, Result, P, - SCEV::FlagAnyWrap, - /*IsSafeToHoist*/ true) - : P; - } - - I = E; - assert(Result && "Nothing was expanded?"); - return Result; - }; - - while (I != OpsAndLoops.end()) { - if (!Prod) { - // This is the first operand. Just expand it. - Prod = ExpandOpBinPowN(); - } else if (I->second->isAllOnesValue()) { - // Instead of doing a multiply by negative one, just do a negate. - Prod = InsertNoopCastOfTo(Prod, Ty); - Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod, - SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); - ++I; - } else { - // A simple mul. - Value *W = ExpandOpBinPowN(); - Prod = InsertNoopCastOfTo(Prod, Ty); - // Canonicalize a constant to the RHS. - if (isa<Constant>(Prod)) std::swap(Prod, W); - const APInt *RHS; - if (match(W, m_Power2(RHS))) { - // Canonicalize Prod*(1<<C) to Prod<<C. - assert(!Ty->isVectorTy() && "vector types are not SCEVable"); - auto NWFlags = S->getNoWrapFlags(); - // clear nsw flag if shl will produce poison value. - if (RHS->logBase2() == RHS->getBitWidth() - 1) - NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW); - Prod = InsertBinop(Instruction::Shl, Prod, - ConstantInt::get(Ty, RHS->logBase2()), NWFlags, - /*IsSafeToHoist*/ true); - } else { - Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(), - /*IsSafeToHoist*/ true); - } - } - } - - return Prod; -} - -Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { - Type *Ty = SE.getEffectiveSCEVType(S->getType()); - - Value *LHS = expandCodeFor(S->getLHS(), Ty); - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) { - const APInt &RHS = SC->getAPInt(); - if (RHS.isPowerOf2()) - return InsertBinop(Instruction::LShr, LHS, - ConstantInt::get(Ty, RHS.logBase2()), - SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); - } - - Value *RHS = expandCodeFor(S->getRHS(), Ty); - return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap, - /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS())); -} - -/// Move parts of Base into Rest to leave Base with the minimal -/// expression that provides a pointer operand suitable for a -/// GEP expansion. -static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, - ScalarEvolution &SE) { - while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) { - Base = A->getStart(); - Rest = SE.getAddExpr(Rest, - SE.getAddRecExpr(SE.getConstant(A->getType(), 0), - A->getStepRecurrence(SE), - A->getLoop(), - A->getNoWrapFlags(SCEV::FlagNW))); - } - if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) { - Base = A->getOperand(A->getNumOperands()-1); - SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end()); - NewAddOps.back() = Rest; - Rest = SE.getAddExpr(NewAddOps); - ExposePointerBase(Base, Rest, SE); - } -} - -/// Determine if this is a well-behaved chain of instructions leading back to -/// the PHI. If so, it may be reused by expanded expressions. -bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, - const Loop *L) { - if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) || - (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) - return false; - // If any of the operands don't dominate the insert position, bail. - // Addrec operands are always loop-invariant, so this can only happen - // if there are instructions which haven't been hoisted. - if (L == IVIncInsertLoop) { - for (User::op_iterator OI = IncV->op_begin()+1, - OE = IncV->op_end(); OI != OE; ++OI) - if (Instruction *OInst = dyn_cast<Instruction>(OI)) - if (!SE.DT.dominates(OInst, IVIncInsertPos)) - return false; - } - // Advance to the next instruction. - IncV = dyn_cast<Instruction>(IncV->getOperand(0)); - if (!IncV) - return false; - - if (IncV->mayHaveSideEffects()) - return false; - - if (IncV == PN) - return true; - - return isNormalAddRecExprPHI(PN, IncV, L); -} - -/// getIVIncOperand returns an induction variable increment's induction -/// variable operand. -/// -/// If allowScale is set, any type of GEP is allowed as long as the nonIV -/// operands dominate InsertPos. -/// -/// If allowScale is not set, ensure that a GEP increment conforms to one of the -/// simple patterns generated by getAddRecExprPHILiterally and -/// expandAddtoGEP. If the pattern isn't recognized, return NULL. -Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, - Instruction *InsertPos, - bool allowScale) { - if (IncV == InsertPos) - return nullptr; - - switch (IncV->getOpcode()) { - default: - return nullptr; - // Check for a simple Add/Sub or GEP of a loop invariant step. - case Instruction::Add: - case Instruction::Sub: { - Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1)); - if (!OInst || SE.DT.dominates(OInst, InsertPos)) - return dyn_cast<Instruction>(IncV->getOperand(0)); - return nullptr; - } - case Instruction::BitCast: - return dyn_cast<Instruction>(IncV->getOperand(0)); - case Instruction::GetElementPtr: - for (auto I = IncV->op_begin() + 1, E = IncV->op_end(); I != E; ++I) { - if (isa<Constant>(*I)) - continue; - if (Instruction *OInst = dyn_cast<Instruction>(*I)) { - if (!SE.DT.dominates(OInst, InsertPos)) - return nullptr; - } - if (allowScale) { - // allow any kind of GEP as long as it can be hoisted. - continue; - } - // This must be a pointer addition of constants (pretty), which is already - // handled, or some number of address-size elements (ugly). Ugly geps - // have 2 operands. i1* is used by the expander to represent an - // address-size element. - if (IncV->getNumOperands() != 2) - return nullptr; - unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace(); - if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) - && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) - return nullptr; - break; - } - return dyn_cast<Instruction>(IncV->getOperand(0)); - } -} - -/// If the insert point of the current builder or any of the builders on the -/// stack of saved builders has 'I' as its insert point, update it to point to -/// the instruction after 'I'. This is intended to be used when the instruction -/// 'I' is being moved. If this fixup is not done and 'I' is moved to a -/// different block, the inconsistent insert point (with a mismatched -/// Instruction and Block) can lead to an instruction being inserted in a block -/// other than its parent. -void SCEVExpander::fixupInsertPoints(Instruction *I) { - BasicBlock::iterator It(*I); - BasicBlock::iterator NewInsertPt = std::next(It); - if (Builder.GetInsertPoint() == It) - Builder.SetInsertPoint(&*NewInsertPt); - for (auto *InsertPtGuard : InsertPointGuards) - if (InsertPtGuard->GetInsertPoint() == It) - InsertPtGuard->SetInsertPoint(NewInsertPt); -} - -/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make -/// it available to other uses in this loop. Recursively hoist any operands, -/// until we reach a value that dominates InsertPos. -bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { - if (SE.DT.dominates(IncV, InsertPos)) - return true; - - // InsertPos must itself dominate IncV so that IncV's new position satisfies - // its existing users. - if (isa<PHINode>(InsertPos) || - !SE.DT.dominates(InsertPos->getParent(), IncV->getParent())) - return false; - - if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos)) - return false; - - // Check that the chain of IV operands leading back to Phi can be hoisted. - SmallVector<Instruction*, 4> IVIncs; - for(;;) { - Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true); - if (!Oper) - return false; - // IncV is safe to hoist. - IVIncs.push_back(IncV); - IncV = Oper; - if (SE.DT.dominates(IncV, InsertPos)) - break; - } - for (auto I = IVIncs.rbegin(), E = IVIncs.rend(); I != E; ++I) { - fixupInsertPoints(*I); - (*I)->moveBefore(InsertPos); - } - return true; -} - -/// Determine if this cyclic phi is in a form that would have been generated by -/// LSR. We don't care if the phi was actually expanded in this pass, as long -/// as it is in a low-cost form, for example, no implied multiplication. This -/// should match any patterns generated by getAddRecExprPHILiterally and -/// expandAddtoGEP. -bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, - const Loop *L) { - for(Instruction *IVOper = IncV; - (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(), - /*allowScale=*/false));) { - if (IVOper == PN) - return true; - } - return false; -} - -/// expandIVInc - Expand an IV increment at Builder's current InsertPos. -/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may -/// need to materialize IV increments elsewhere to handle difficult situations. -Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, - Type *ExpandTy, Type *IntTy, - bool useSubtract) { - Value *IncV; - // If the PHI is a pointer, use a GEP, otherwise use an add or sub. - if (ExpandTy->isPointerTy()) { - PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); - // If the step isn't constant, don't use an implicitly scaled GEP, because - // that would require a multiply inside the loop. - if (!isa<ConstantInt>(StepV)) - GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), - GEPPtrTy->getAddressSpace()); - IncV = expandAddToGEP(SE.getSCEV(StepV), GEPPtrTy, IntTy, PN); - if (IncV->getType() != PN->getType()) { - IncV = Builder.CreateBitCast(IncV, PN->getType()); - rememberInstruction(IncV); - } - } else { - IncV = useSubtract ? - Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : - Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); - rememberInstruction(IncV); - } - return IncV; -} - -/// Hoist the addrec instruction chain rooted in the loop phi above the -/// position. This routine assumes that this is possible (has been checked). -void SCEVExpander::hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist, - Instruction *Pos, PHINode *LoopPhi) { - do { - if (DT->dominates(InstToHoist, Pos)) - break; - // Make sure the increment is where we want it. But don't move it - // down past a potential existing post-inc user. - fixupInsertPoints(InstToHoist); - InstToHoist->moveBefore(Pos); - Pos = InstToHoist; - InstToHoist = cast<Instruction>(InstToHoist->getOperand(0)); - } while (InstToHoist != LoopPhi); -} - -/// Check whether we can cheaply express the requested SCEV in terms of -/// the available PHI SCEV by truncation and/or inversion of the step. -static bool canBeCheaplyTransformed(ScalarEvolution &SE, - const SCEVAddRecExpr *Phi, - const SCEVAddRecExpr *Requested, - bool &InvertStep) { - Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType()); - Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType()); - - if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth()) - return false; - - // Try truncate it if necessary. - Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy)); - if (!Phi) - return false; - - // Check whether truncation will help. - if (Phi == Requested) { - InvertStep = false; - return true; - } - - // Check whether inverting will help: {R,+,-1} == R - {0,+,1}. - if (SE.getAddExpr(Requested->getStart(), - SE.getNegativeSCEV(Requested)) == Phi) { - InvertStep = true; - return true; - } - - return false; -} - -static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { - if (!isa<IntegerType>(AR->getType())) - return false; - - unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth(); - Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2); - const SCEV *Step = AR->getStepRecurrence(SE); - const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy), - SE.getSignExtendExpr(AR, WideTy)); - const SCEV *ExtendAfterOp = - SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy); - return ExtendAfterOp == OpAfterExtend; -} - -static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { - if (!isa<IntegerType>(AR->getType())) - return false; - - unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth(); - Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2); - const SCEV *Step = AR->getStepRecurrence(SE); - const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy), - SE.getZeroExtendExpr(AR, WideTy)); - const SCEV *ExtendAfterOp = - SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy); - return ExtendAfterOp == OpAfterExtend; -} - -/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand -/// the base addrec, which is the addrec without any non-loop-dominating -/// values, and return the PHI. -PHINode * -SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, - const Loop *L, - Type *ExpandTy, - Type *IntTy, - Type *&TruncTy, - bool &InvertStep) { - assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position"); - - // Reuse a previously-inserted PHI, if present. - BasicBlock *LatchBlock = L->getLoopLatch(); - if (LatchBlock) { - PHINode *AddRecPhiMatch = nullptr; - Instruction *IncV = nullptr; - TruncTy = nullptr; - InvertStep = false; - - // Only try partially matching scevs that need truncation and/or - // step-inversion if we know this loop is outside the current loop. - bool TryNonMatchingSCEV = - IVIncInsertLoop && - SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader()); - - for (PHINode &PN : L->getHeader()->phis()) { - if (!SE.isSCEVable(PN.getType())) - continue; - - const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN)); - if (!PhiSCEV) - continue; - - bool IsMatchingSCEV = PhiSCEV == Normalized; - // We only handle truncation and inversion of phi recurrences for the - // expanded expression if the expanded expression's loop dominates the - // loop we insert to. Check now, so we can bail out early. - if (!IsMatchingSCEV && !TryNonMatchingSCEV) - continue; - - // TODO: this possibly can be reworked to avoid this cast at all. - Instruction *TempIncV = - dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock)); - if (!TempIncV) - continue; - - // Check whether we can reuse this PHI node. - if (LSRMode) { - if (!isExpandedAddRecExprPHI(&PN, TempIncV, L)) - continue; - if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos)) - continue; - } else { - if (!isNormalAddRecExprPHI(&PN, TempIncV, L)) - continue; - } - - // Stop if we have found an exact match SCEV. - if (IsMatchingSCEV) { - IncV = TempIncV; - TruncTy = nullptr; - InvertStep = false; - AddRecPhiMatch = &PN; - break; - } - - // Try whether the phi can be translated into the requested form - // (truncated and/or offset by a constant). - if ((!TruncTy || InvertStep) && - canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) { - // Record the phi node. But don't stop we might find an exact match - // later. - AddRecPhiMatch = &PN; - IncV = TempIncV; - TruncTy = SE.getEffectiveSCEVType(Normalized->getType()); - } - } - - if (AddRecPhiMatch) { - // Potentially, move the increment. We have made sure in - // isExpandedAddRecExprPHI or hoistIVInc that this is possible. - if (L == IVIncInsertLoop) - hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch); - - // Ok, the add recurrence looks usable. - // Remember this PHI, even in post-inc mode. - InsertedValues.insert(AddRecPhiMatch); - // Remember the increment. - rememberInstruction(IncV); - return AddRecPhiMatch; - } - } - - // Save the original insertion point so we can restore it when we're done. - SCEVInsertPointGuard Guard(Builder, this); - - // Another AddRec may need to be recursively expanded below. For example, if - // this AddRec is quadratic, the StepV may itself be an AddRec in this - // loop. Remove this loop from the PostIncLoops set before expanding such - // AddRecs. Otherwise, we cannot find a valid position for the step - // (i.e. StepV can never dominate its loop header). Ideally, we could do - // SavedIncLoops.swap(PostIncLoops), but we generally have a single element, - // so it's not worth implementing SmallPtrSet::swap. - PostIncLoopSet SavedPostIncLoops = PostIncLoops; - PostIncLoops.clear(); - - // Expand code for the start value into the loop preheader. - assert(L->getLoopPreheader() && - "Can't expand add recurrences without a loop preheader!"); - Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, - L->getLoopPreheader()->getTerminator()); - - // StartV must have been be inserted into L's preheader to dominate the new - // phi. - assert(!isa<Instruction>(StartV) || - SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(), - L->getHeader())); - - // Expand code for the step value. Do this before creating the PHI so that PHI - // reuse code doesn't see an incomplete PHI. - const SCEV *Step = Normalized->getStepRecurrence(SE); - // If the stride is negative, insert a sub instead of an add for the increment - // (unless it's a constant, because subtracts of constants are canonicalized - // to adds). - bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); - if (useSubtract) - Step = SE.getNegativeSCEV(Step); - // Expand the step somewhere that dominates the loop header. - Value *StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front()); - - // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if - // we actually do emit an addition. It does not apply if we emit a - // subtraction. - bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized); - bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized); - - // Create the PHI. - BasicBlock *Header = L->getHeader(); - Builder.SetInsertPoint(Header, Header->begin()); - pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); - PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), - Twine(IVName) + ".iv"); - rememberInstruction(PN); - - // Create the step instructions and populate the PHI. - for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { - BasicBlock *Pred = *HPI; - - // Add a start value. - if (!L->contains(Pred)) { - PN->addIncoming(StartV, Pred); - continue; - } - - // Create a step value and add it to the PHI. - // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the - // instructions at IVIncInsertPos. - Instruction *InsertPos = L == IVIncInsertLoop ? - IVIncInsertPos : Pred->getTerminator(); - Builder.SetInsertPoint(InsertPos); - Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); - - if (isa<OverflowingBinaryOperator>(IncV)) { - if (IncrementIsNUW) - cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap(); - if (IncrementIsNSW) - cast<BinaryOperator>(IncV)->setHasNoSignedWrap(); - } - PN->addIncoming(IncV, Pred); - } - - // After expanding subexpressions, restore the PostIncLoops set so the caller - // can ensure that IVIncrement dominates the current uses. - PostIncLoops = SavedPostIncLoops; - - // Remember this PHI, even in post-inc mode. - InsertedValues.insert(PN); - - return PN; -} - -Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { - Type *STy = S->getType(); - Type *IntTy = SE.getEffectiveSCEVType(STy); - const Loop *L = S->getLoop(); - - // Determine a normalized form of this expression, which is the expression - // before any post-inc adjustment is made. - const SCEVAddRecExpr *Normalized = S; - if (PostIncLoops.count(L)) { - PostIncLoopSet Loops; - Loops.insert(L); - Normalized = cast<SCEVAddRecExpr>(normalizeForPostIncUse(S, Loops, SE)); - } - - // Strip off any non-loop-dominating component from the addrec start. - const SCEV *Start = Normalized->getStart(); - const SCEV *PostLoopOffset = nullptr; - if (!SE.properlyDominates(Start, L->getHeader())) { - PostLoopOffset = Start; - Start = SE.getConstant(Normalized->getType(), 0); - Normalized = cast<SCEVAddRecExpr>( - SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), - Normalized->getLoop(), - Normalized->getNoWrapFlags(SCEV::FlagNW))); - } - - // Strip off any non-loop-dominating component from the addrec step. - const SCEV *Step = Normalized->getStepRecurrence(SE); - const SCEV *PostLoopScale = nullptr; - if (!SE.dominates(Step, L->getHeader())) { - PostLoopScale = Step; - Step = SE.getConstant(Normalized->getType(), 1); - if (!Start->isZero()) { - // The normalization below assumes that Start is constant zero, so if - // it isn't re-associate Start to PostLoopOffset. - assert(!PostLoopOffset && "Start not-null but PostLoopOffset set?"); - PostLoopOffset = Start; - Start = SE.getConstant(Normalized->getType(), 0); - } - Normalized = - cast<SCEVAddRecExpr>(SE.getAddRecExpr( - Start, Step, Normalized->getLoop(), - Normalized->getNoWrapFlags(SCEV::FlagNW))); - } - - // Expand the core addrec. If we need post-loop scaling, force it to - // expand to an integer type to avoid the need for additional casting. - Type *ExpandTy = PostLoopScale ? IntTy : STy; - // We can't use a pointer type for the addrec if the pointer type is - // non-integral. - Type *AddRecPHIExpandTy = - DL.isNonIntegralPointerType(STy) ? Normalized->getType() : ExpandTy; - - // In some cases, we decide to reuse an existing phi node but need to truncate - // it and/or invert the step. - Type *TruncTy = nullptr; - bool InvertStep = false; - PHINode *PN = getAddRecExprPHILiterally(Normalized, L, AddRecPHIExpandTy, - IntTy, TruncTy, InvertStep); - - // Accommodate post-inc mode, if necessary. - Value *Result; - if (!PostIncLoops.count(L)) - Result = PN; - else { - // In PostInc mode, use the post-incremented value. - BasicBlock *LatchBlock = L->getLoopLatch(); - assert(LatchBlock && "PostInc mode requires a unique loop latch!"); - Result = PN->getIncomingValueForBlock(LatchBlock); - - // For an expansion to use the postinc form, the client must call - // expandCodeFor with an InsertPoint that is either outside the PostIncLoop - // or dominated by IVIncInsertPos. - if (isa<Instruction>(Result) && - !SE.DT.dominates(cast<Instruction>(Result), - &*Builder.GetInsertPoint())) { - // The induction variable's postinc expansion does not dominate this use. - // IVUsers tries to prevent this case, so it is rare. However, it can - // happen when an IVUser outside the loop is not dominated by the latch - // block. Adjusting IVIncInsertPos before expansion begins cannot handle - // all cases. Consider a phi outside whose operand is replaced during - // expansion with the value of the postinc user. Without fundamentally - // changing the way postinc users are tracked, the only remedy is - // inserting an extra IV increment. StepV might fold into PostLoopOffset, - // but hopefully expandCodeFor handles that. - bool useSubtract = - !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); - if (useSubtract) - Step = SE.getNegativeSCEV(Step); - Value *StepV; - { - // Expand the step somewhere that dominates the loop header. - SCEVInsertPointGuard Guard(Builder, this); - StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front()); - } - Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); - } - } - - // We have decided to reuse an induction variable of a dominating loop. Apply - // truncation and/or inversion of the step. - if (TruncTy) { - Type *ResTy = Result->getType(); - // Normalize the result type. - if (ResTy != SE.getEffectiveSCEVType(ResTy)) - Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy)); - // Truncate the result. - if (TruncTy != Result->getType()) { - Result = Builder.CreateTrunc(Result, TruncTy); - rememberInstruction(Result); - } - // Invert the result. - if (InvertStep) { - Result = Builder.CreateSub(expandCodeFor(Normalized->getStart(), TruncTy), - Result); - rememberInstruction(Result); - } - } - - // Re-apply any non-loop-dominating scale. - if (PostLoopScale) { - assert(S->isAffine() && "Can't linearly scale non-affine recurrences."); - Result = InsertNoopCastOfTo(Result, IntTy); - Result = Builder.CreateMul(Result, - expandCodeFor(PostLoopScale, IntTy)); - rememberInstruction(Result); - } - - // Re-apply any non-loop-dominating offset. - if (PostLoopOffset) { - if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) { - if (Result->getType()->isIntegerTy()) { - Value *Base = expandCodeFor(PostLoopOffset, ExpandTy); - Result = expandAddToGEP(SE.getUnknown(Result), PTy, IntTy, Base); - } else { - Result = expandAddToGEP(PostLoopOffset, PTy, IntTy, Result); - } - } else { - Result = InsertNoopCastOfTo(Result, IntTy); - Result = Builder.CreateAdd(Result, - expandCodeFor(PostLoopOffset, IntTy)); - rememberInstruction(Result); - } - } - - return Result; -} - -Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { - // In canonical mode we compute the addrec as an expression of a canonical IV - // using evaluateAtIteration and expand the resulting SCEV expression. This - // way we avoid introducing new IVs to carry on the comutation of the addrec - // throughout the loop. - // - // For nested addrecs evaluateAtIteration might need a canonical IV of a - // type wider than the addrec itself. Emitting a canonical IV of the - // proper type might produce non-legal types, for example expanding an i64 - // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall - // back to non-canonical mode for nested addrecs. - if (!CanonicalMode || (S->getNumOperands() > 2)) - return expandAddRecExprLiterally(S); - - Type *Ty = SE.getEffectiveSCEVType(S->getType()); - const Loop *L = S->getLoop(); - - // First check for an existing canonical IV in a suitable type. - PHINode *CanonicalIV = nullptr; - if (PHINode *PN = L->getCanonicalInductionVariable()) - if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) - CanonicalIV = PN; - - // Rewrite an AddRec in terms of the canonical induction variable, if - // its type is more narrow. - if (CanonicalIV && - SE.getTypeSizeInBits(CanonicalIV->getType()) > - SE.getTypeSizeInBits(Ty)) { - SmallVector<const SCEV *, 4> NewOps(S->getNumOperands()); - for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) - NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); - Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), - S->getNoWrapFlags(SCEV::FlagNW))); - BasicBlock::iterator NewInsertPt = - findInsertPointAfter(cast<Instruction>(V), Builder.GetInsertBlock()); - V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, - &*NewInsertPt); - return V; - } - - // {X,+,F} --> X + {0,+,F} - if (!S->getStart()->isZero()) { - SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end()); - NewOps[0] = SE.getConstant(Ty, 0); - const SCEV *Rest = SE.getAddRecExpr(NewOps, L, - S->getNoWrapFlags(SCEV::FlagNW)); - - // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the - // comments on expandAddToGEP for details. - const SCEV *Base = S->getStart(); - // Dig into the expression to find the pointer base for a GEP. - const SCEV *ExposedRest = Rest; - ExposePointerBase(Base, ExposedRest, SE); - // If we found a pointer, expand the AddRec with a GEP. - if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) { - // Make sure the Base isn't something exotic, such as a multiplied - // or divided pointer value. In those cases, the result type isn't - // actually a pointer type. - if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) { - Value *StartV = expand(Base); - assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!"); - return expandAddToGEP(ExposedRest, PTy, Ty, StartV); - } - } - - // Just do a normal add. Pre-expand the operands to suppress folding. - // - // The LHS and RHS values are factored out of the expand call to make the - // output independent of the argument evaluation order. - const SCEV *AddExprLHS = SE.getUnknown(expand(S->getStart())); - const SCEV *AddExprRHS = SE.getUnknown(expand(Rest)); - return expand(SE.getAddExpr(AddExprLHS, AddExprRHS)); - } - - // If we don't yet have a canonical IV, create one. - if (!CanonicalIV) { - // Create and insert the PHI node for the induction variable in the - // specified loop. - BasicBlock *Header = L->getHeader(); - pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); - CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar", - &Header->front()); - rememberInstruction(CanonicalIV); - - SmallSet<BasicBlock *, 4> PredSeen; - Constant *One = ConstantInt::get(Ty, 1); - for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { - BasicBlock *HP = *HPI; - if (!PredSeen.insert(HP).second) { - // There must be an incoming value for each predecessor, even the - // duplicates! - CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP); - continue; - } - - if (L->contains(HP)) { - // Insert a unit add instruction right before the terminator - // corresponding to the back-edge. - Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One, - "indvar.next", - HP->getTerminator()); - Add->setDebugLoc(HP->getTerminator()->getDebugLoc()); - rememberInstruction(Add); - CanonicalIV->addIncoming(Add, HP); - } else { - CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP); - } - } - } - - // {0,+,1} --> Insert a canonical induction variable into the loop! - if (S->isAffine() && S->getOperand(1)->isOne()) { - assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && - "IVs with types different from the canonical IV should " - "already have been handled!"); - return CanonicalIV; - } - - // {0,+,F} --> {0,+,1} * F - - // If this is a simple linear addrec, emit it now as a special case. - if (S->isAffine()) // {0,+,F} --> i*F - return - expand(SE.getTruncateOrNoop( - SE.getMulExpr(SE.getUnknown(CanonicalIV), - SE.getNoopOrAnyExtend(S->getOperand(1), - CanonicalIV->getType())), - Ty)); - - // If this is a chain of recurrences, turn it into a closed form, using the - // folders, then expandCodeFor the closed form. This allows the folders to - // simplify the expression without having to build a bunch of special code - // into this folder. - const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV. - - // Promote S up to the canonical IV type, if the cast is foldable. - const SCEV *NewS = S; - const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType()); - if (isa<SCEVAddRecExpr>(Ext)) - NewS = Ext; - - const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE); - //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n"; - - // Truncate the result down to the original type, if needed. - const SCEV *T = SE.getTruncateOrNoop(V, Ty); - return expand(T); -} - -Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { - Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *V = expandCodeFor(S->getOperand(), - SE.getEffectiveSCEVType(S->getOperand()->getType())); - Value *I = Builder.CreateTrunc(V, Ty); - rememberInstruction(I); - return I; -} - -Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { - Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *V = expandCodeFor(S->getOperand(), - SE.getEffectiveSCEVType(S->getOperand()->getType())); - Value *I = Builder.CreateZExt(V, Ty); - rememberInstruction(I); - return I; -} - -Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { - Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *V = expandCodeFor(S->getOperand(), - SE.getEffectiveSCEVType(S->getOperand()->getType())); - Value *I = Builder.CreateSExt(V, Ty); - rememberInstruction(I); - return I; -} - -Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { - Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); - Type *Ty = LHS->getType(); - for (int i = S->getNumOperands()-2; i >= 0; --i) { - // In the case of mixed integer and pointer types, do the - // rest of the comparisons as integer. - Type *OpTy = S->getOperand(i)->getType(); - if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { - Ty = SE.getEffectiveSCEVType(Ty); - LHS = InsertNoopCastOfTo(LHS, Ty); - } - Value *RHS = expandCodeFor(S->getOperand(i), Ty); - Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); - rememberInstruction(ICmp); - Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); - rememberInstruction(Sel); - LHS = Sel; - } - // In the case of mixed integer and pointer types, cast the - // final result back to the pointer type. - if (LHS->getType() != S->getType()) - LHS = InsertNoopCastOfTo(LHS, S->getType()); - return LHS; -} - -Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { - Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); - Type *Ty = LHS->getType(); - for (int i = S->getNumOperands()-2; i >= 0; --i) { - // In the case of mixed integer and pointer types, do the - // rest of the comparisons as integer. - Type *OpTy = S->getOperand(i)->getType(); - if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { - Ty = SE.getEffectiveSCEVType(Ty); - LHS = InsertNoopCastOfTo(LHS, Ty); - } - Value *RHS = expandCodeFor(S->getOperand(i), Ty); - Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); - rememberInstruction(ICmp); - Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); - rememberInstruction(Sel); - LHS = Sel; - } - // In the case of mixed integer and pointer types, cast the - // final result back to the pointer type. - if (LHS->getType() != S->getType()) - LHS = InsertNoopCastOfTo(LHS, S->getType()); - return LHS; -} - -Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { - Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); - Type *Ty = LHS->getType(); - for (int i = S->getNumOperands() - 2; i >= 0; --i) { - // In the case of mixed integer and pointer types, do the - // rest of the comparisons as integer. - Type *OpTy = S->getOperand(i)->getType(); - if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { - Ty = SE.getEffectiveSCEVType(Ty); - LHS = InsertNoopCastOfTo(LHS, Ty); - } - Value *RHS = expandCodeFor(S->getOperand(i), Ty); - Value *ICmp = Builder.CreateICmpSLT(LHS, RHS); - rememberInstruction(ICmp); - Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin"); - rememberInstruction(Sel); - LHS = Sel; - } - // In the case of mixed integer and pointer types, cast the - // final result back to the pointer type. - if (LHS->getType() != S->getType()) - LHS = InsertNoopCastOfTo(LHS, S->getType()); - return LHS; -} - -Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { - Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); - Type *Ty = LHS->getType(); - for (int i = S->getNumOperands() - 2; i >= 0; --i) { - // In the case of mixed integer and pointer types, do the - // rest of the comparisons as integer. - Type *OpTy = S->getOperand(i)->getType(); - if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { - Ty = SE.getEffectiveSCEVType(Ty); - LHS = InsertNoopCastOfTo(LHS, Ty); - } - Value *RHS = expandCodeFor(S->getOperand(i), Ty); - Value *ICmp = Builder.CreateICmpULT(LHS, RHS); - rememberInstruction(ICmp); - Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin"); - rememberInstruction(Sel); - LHS = Sel; - } - // In the case of mixed integer and pointer types, cast the - // final result back to the pointer type. - if (LHS->getType() != S->getType()) - LHS = InsertNoopCastOfTo(LHS, S->getType()); - return LHS; -} - -Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, - Instruction *IP) { - setInsertPoint(IP); - return expandCodeFor(SH, Ty); -} - -Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) { - // Expand the code for this SCEV. - Value *V = expand(SH); - if (Ty) { - assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) && - "non-trivial casts should be done with the SCEVs directly!"); - V = InsertNoopCastOfTo(V, Ty); - } - return V; -} - -ScalarEvolution::ValueOffsetPair -SCEVExpander::FindValueInExprValueMap(const SCEV *S, - const Instruction *InsertPt) { - SetVector<ScalarEvolution::ValueOffsetPair> *Set = SE.getSCEVValues(S); - // If the expansion is not in CanonicalMode, and the SCEV contains any - // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally. - if (CanonicalMode || !SE.containsAddRecurrence(S)) { - // If S is scConstant, it may be worse to reuse an existing Value. - if (S->getSCEVType() != scConstant && Set) { - // Choose a Value from the set which dominates the insertPt. - // insertPt should be inside the Value's parent loop so as not to break - // the LCSSA form. - for (auto const &VOPair : *Set) { - Value *V = VOPair.first; - ConstantInt *Offset = VOPair.second; - Instruction *EntInst = nullptr; - if (V && isa<Instruction>(V) && (EntInst = cast<Instruction>(V)) && - S->getType() == V->getType() && - EntInst->getFunction() == InsertPt->getFunction() && - SE.DT.dominates(EntInst, InsertPt) && - (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || - SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) - return {V, Offset}; - } - } - } - return {nullptr, nullptr}; -} - -// The expansion of SCEV will either reuse a previous Value in ExprValueMap, -// or expand the SCEV literally. Specifically, if the expansion is in LSRMode, -// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded -// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise, -// the expansion will try to reuse Value from ExprValueMap, and only when it -// fails, expand the SCEV literally. -Value *SCEVExpander::expand(const SCEV *S) { - // Compute an insertion point for this SCEV object. Hoist the instructions - // as far out in the loop nest as possible. - Instruction *InsertPt = &*Builder.GetInsertPoint(); - - // We can move insertion point only if there is no div or rem operations - // otherwise we are risky to move it over the check for zero denominator. - auto SafeToHoist = [](const SCEV *S) { - return !SCEVExprContains(S, [](const SCEV *S) { - if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) { - if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS())) - // Division by non-zero constants can be hoisted. - return SC->getValue()->isZero(); - // All other divisions should not be moved as they may be - // divisions by zero and should be kept within the - // conditions of the surrounding loops that guard their - // execution (see PR35406). - return true; - } - return false; - }); - }; - if (SafeToHoist(S)) { - for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());; - L = L->getParentLoop()) { - if (SE.isLoopInvariant(S, L)) { - if (!L) break; - if (BasicBlock *Preheader = L->getLoopPreheader()) - InsertPt = Preheader->getTerminator(); - else - // LSR sets the insertion point for AddRec start/step values to the - // block start to simplify value reuse, even though it's an invalid - // position. SCEVExpander must correct for this in all cases. - InsertPt = &*L->getHeader()->getFirstInsertionPt(); - } else { - // If the SCEV is computable at this level, insert it into the header - // after the PHIs (and after any other instructions that we've inserted - // there) so that it is guaranteed to dominate any user inside the loop. - if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) - InsertPt = &*L->getHeader()->getFirstInsertionPt(); - while (InsertPt->getIterator() != Builder.GetInsertPoint() && - (isInsertedInstruction(InsertPt) || - isa<DbgInfoIntrinsic>(InsertPt))) - InsertPt = &*std::next(InsertPt->getIterator()); - break; - } - } - } - - // IndVarSimplify sometimes sets the insertion point at the block start, even - // when there are PHIs at that point. We must correct for this. - if (isa<PHINode>(*InsertPt)) - InsertPt = &*InsertPt->getParent()->getFirstInsertionPt(); - - // Check to see if we already expanded this here. - auto I = InsertedExpressions.find(std::make_pair(S, InsertPt)); - if (I != InsertedExpressions.end()) - return I->second; - - SCEVInsertPointGuard Guard(Builder, this); - Builder.SetInsertPoint(InsertPt); - - // Expand the expression into instructions. - ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, InsertPt); - Value *V = VO.first; - - if (!V) - V = visit(S); - else if (VO.second) { - if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) { - Type *Ety = Vty->getPointerElementType(); - int64_t Offset = VO.second->getSExtValue(); - int64_t ESize = SE.getTypeSizeInBits(Ety); - if ((Offset * 8) % ESize == 0) { - ConstantInt *Idx = - ConstantInt::getSigned(VO.second->getType(), -(Offset * 8) / ESize); - V = Builder.CreateGEP(Ety, V, Idx, "scevgep"); - } else { - ConstantInt *Idx = - ConstantInt::getSigned(VO.second->getType(), -Offset); - unsigned AS = Vty->getAddressSpace(); - V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS)); - V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx, - "uglygep"); - V = Builder.CreateBitCast(V, Vty); - } - } else { - V = Builder.CreateSub(V, VO.second); - } - } - // Remember the expanded value for this SCEV at this location. - // - // This is independent of PostIncLoops. The mapped value simply materializes - // the expression at this insertion point. If the mapped value happened to be - // a postinc expansion, it could be reused by a non-postinc user, but only if - // its insertion point was already at the head of the loop. - InsertedExpressions[std::make_pair(S, InsertPt)] = V; - return V; -} - -void SCEVExpander::rememberInstruction(Value *I) { - if (!PostIncLoops.empty()) - InsertedPostIncValues.insert(I); - else - InsertedValues.insert(I); -} - -/// getOrInsertCanonicalInductionVariable - This method returns the -/// canonical induction variable of the specified type for the specified -/// loop (inserting one if there is none). A canonical induction variable -/// starts at zero and steps by one on each iteration. -PHINode * -SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, - Type *Ty) { - assert(Ty->isIntegerTy() && "Can only insert integer induction variables!"); - - // Build a SCEV for {0,+,1}<L>. - // Conservatively use FlagAnyWrap for now. - const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0), - SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap); - - // Emit code for it. - SCEVInsertPointGuard Guard(Builder, this); - PHINode *V = - cast<PHINode>(expandCodeFor(H, nullptr, &L->getHeader()->front())); - - return V; -} - -/// replaceCongruentIVs - Check for congruent phis in this loop header and -/// replace them with their most canonical representative. Return the number of -/// phis eliminated. -/// -/// This does not depend on any SCEVExpander state but should be used in -/// the same context that SCEVExpander is used. -unsigned -SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, - SmallVectorImpl<WeakTrackingVH> &DeadInsts, - const TargetTransformInfo *TTI) { - // Find integer phis in order of increasing width. - SmallVector<PHINode*, 8> Phis; - for (PHINode &PN : L->getHeader()->phis()) - Phis.push_back(&PN); - - if (TTI) - llvm::sort(Phis, [](Value *LHS, Value *RHS) { - // Put pointers at the back and make sure pointer < pointer = false. - if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) - return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy(); - return RHS->getType()->getPrimitiveSizeInBits() < - LHS->getType()->getPrimitiveSizeInBits(); - }); - - unsigned NumElim = 0; - DenseMap<const SCEV *, PHINode *> ExprToIVMap; - // Process phis from wide to narrow. Map wide phis to their truncation - // so narrow phis can reuse them. - for (PHINode *Phi : Phis) { - auto SimplifyPHINode = [&](PHINode *PN) -> Value * { - if (Value *V = SimplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC})) - return V; - if (!SE.isSCEVable(PN->getType())) - return nullptr; - auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN)); - if (!Const) - return nullptr; - return Const->getValue(); - }; - - // Fold constant phis. They may be congruent to other constant phis and - // would confuse the logic below that expects proper IVs. - if (Value *V = SimplifyPHINode(Phi)) { - if (V->getType() != Phi->getType()) - continue; - Phi->replaceAllUsesWith(V); - DeadInsts.emplace_back(Phi); - ++NumElim; - DEBUG_WITH_TYPE(DebugType, dbgs() - << "INDVARS: Eliminated constant iv: " << *Phi << '\n'); - continue; - } - - if (!SE.isSCEVable(Phi->getType())) - continue; - - PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; - if (!OrigPhiRef) { - OrigPhiRef = Phi; - if (Phi->getType()->isIntegerTy() && TTI && - TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { - // This phi can be freely truncated to the narrowest phi type. Map the - // truncated expression to it so it will be reused for narrow types. - const SCEV *TruncExpr = - SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType()); - ExprToIVMap[TruncExpr] = Phi; - } - continue; - } - - // Replacing a pointer phi with an integer phi or vice-versa doesn't make - // sense. - if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy()) - continue; - - if (BasicBlock *LatchBlock = L->getLoopLatch()) { - Instruction *OrigInc = dyn_cast<Instruction>( - OrigPhiRef->getIncomingValueForBlock(LatchBlock)); - Instruction *IsomorphicInc = - dyn_cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock)); - - if (OrigInc && IsomorphicInc) { - // If this phi has the same width but is more canonical, replace the - // original with it. As part of the "more canonical" determination, - // respect a prior decision to use an IV chain. - if (OrigPhiRef->getType() == Phi->getType() && - !(ChainedPhis.count(Phi) || - isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) && - (ChainedPhis.count(Phi) || - isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) { - std::swap(OrigPhiRef, Phi); - std::swap(OrigInc, IsomorphicInc); - } - // Replacing the congruent phi is sufficient because acyclic - // redundancy elimination, CSE/GVN, should handle the - // rest. However, once SCEV proves that a phi is congruent, - // it's often the head of an IV user cycle that is isomorphic - // with the original phi. It's worth eagerly cleaning up the - // common case of a single IV increment so that DeleteDeadPHIs - // can remove cycles that had postinc uses. - const SCEV *TruncExpr = - SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType()); - if (OrigInc != IsomorphicInc && - TruncExpr == SE.getSCEV(IsomorphicInc) && - SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc) && - hoistIVInc(OrigInc, IsomorphicInc)) { - DEBUG_WITH_TYPE(DebugType, - dbgs() << "INDVARS: Eliminated congruent iv.inc: " - << *IsomorphicInc << '\n'); - Value *NewInc = OrigInc; - if (OrigInc->getType() != IsomorphicInc->getType()) { - Instruction *IP = nullptr; - if (PHINode *PN = dyn_cast<PHINode>(OrigInc)) - IP = &*PN->getParent()->getFirstInsertionPt(); - else - IP = OrigInc->getNextNode(); - - IRBuilder<> Builder(IP); - Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc()); - NewInc = Builder.CreateTruncOrBitCast( - OrigInc, IsomorphicInc->getType(), IVName); - } - IsomorphicInc->replaceAllUsesWith(NewInc); - DeadInsts.emplace_back(IsomorphicInc); - } - } - } - DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv: " - << *Phi << '\n'); - ++NumElim; - Value *NewIV = OrigPhiRef; - if (OrigPhiRef->getType() != Phi->getType()) { - IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt()); - Builder.SetCurrentDebugLocation(Phi->getDebugLoc()); - NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName); - } - Phi->replaceAllUsesWith(NewIV); - DeadInsts.emplace_back(Phi); - } - return NumElim; -} - -Value *SCEVExpander::getExactExistingExpansion(const SCEV *S, - const Instruction *At, Loop *L) { - Optional<ScalarEvolution::ValueOffsetPair> VO = - getRelatedExistingExpansion(S, At, L); - if (VO && VO.getValue().second == nullptr) - return VO.getValue().first; - return nullptr; -} - -Optional<ScalarEvolution::ValueOffsetPair> -SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At, - Loop *L) { - using namespace llvm::PatternMatch; - - SmallVector<BasicBlock *, 4> ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - - // Look for suitable value in simple conditions at the loop exits. - for (BasicBlock *BB : ExitingBlocks) { - ICmpInst::Predicate Pred; - Instruction *LHS, *RHS; - - if (!match(BB->getTerminator(), - m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)), - m_BasicBlock(), m_BasicBlock()))) - continue; - - if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At)) - return ScalarEvolution::ValueOffsetPair(LHS, nullptr); - - if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At)) - return ScalarEvolution::ValueOffsetPair(RHS, nullptr); - } - - // Use expand's logic which is used for reusing a previous Value in - // ExprValueMap. - ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At); - if (VO.first) - return VO; - - // There is potential to make this significantly smarter, but this simple - // heuristic already gets some interesting cases. - - // Can not find suitable value. - return None; -} - -bool SCEVExpander::isHighCostExpansionHelper( - const SCEV *S, Loop *L, const Instruction *At, - SmallPtrSetImpl<const SCEV *> &Processed) { - - // If we can find an existing value for this scev available at the point "At" - // then consider the expression cheap. - if (At && getRelatedExistingExpansion(S, At, L)) - return false; - - // Zero/One operand expressions - switch (S->getSCEVType()) { - case scUnknown: - case scConstant: - return false; - case scTruncate: - return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(), - L, At, Processed); - case scZeroExtend: - return isHighCostExpansionHelper(cast<SCEVZeroExtendExpr>(S)->getOperand(), - L, At, Processed); - case scSignExtend: - return isHighCostExpansionHelper(cast<SCEVSignExtendExpr>(S)->getOperand(), - L, At, Processed); - } - - if (!Processed.insert(S).second) - return false; - - if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) { - // If the divisor is a power of two and the SCEV type fits in a native - // integer (and the LHS not expensive), consider the division cheap - // irrespective of whether it occurs in the user code since it can be - // lowered into a right shift. - if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS())) - if (SC->getAPInt().isPowerOf2()) { - if (isHighCostExpansionHelper(UDivExpr->getLHS(), L, At, Processed)) - return true; - const DataLayout &DL = - L->getHeader()->getParent()->getParent()->getDataLayout(); - unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth(); - return DL.isIllegalInteger(Width); - } - - // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or - // HowManyLessThans produced to compute a precise expression, rather than a - // UDiv from the user's code. If we can't find a UDiv in the code with some - // simple searching, assume the former consider UDivExpr expensive to - // compute. - BasicBlock *ExitingBB = L->getExitingBlock(); - if (!ExitingBB) - return true; - - // At the beginning of this function we already tried to find existing value - // for plain 'S'. Now try to lookup 'S + 1' since it is common pattern - // involving division. This is just a simple search heuristic. - if (!At) - At = &ExitingBB->back(); - if (!getRelatedExistingExpansion( - SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L)) - return true; - } - - // HowManyLessThans uses a Max expression whenever the loop is not guarded by - // the exit condition. - if (isa<SCEVMinMaxExpr>(S)) - return true; - - // Recurse past nary expressions, which commonly occur in the - // BackedgeTakenCount. They may already exist in program code, and if not, - // they are not too expensive rematerialize. - if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) { - for (auto *Op : NAry->operands()) - if (isHighCostExpansionHelper(Op, L, At, Processed)) - return true; - } - - // If we haven't recognized an expensive SCEV pattern, assume it's an - // expression produced by program code. - return false; -} - -Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, - Instruction *IP) { - assert(IP); - switch (Pred->getKind()) { - case SCEVPredicate::P_Union: - return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP); - case SCEVPredicate::P_Equal: - return expandEqualPredicate(cast<SCEVEqualPredicate>(Pred), IP); - case SCEVPredicate::P_Wrap: { - auto *AddRecPred = cast<SCEVWrapPredicate>(Pred); - return expandWrapPredicate(AddRecPred, IP); - } - } - llvm_unreachable("Unknown SCEV predicate type"); -} - -Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred, - Instruction *IP) { - Value *Expr0 = expandCodeFor(Pred->getLHS(), Pred->getLHS()->getType(), IP); - Value *Expr1 = expandCodeFor(Pred->getRHS(), Pred->getRHS()->getType(), IP); - - Builder.SetInsertPoint(IP); - auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check"); - return I; -} - -Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, - Instruction *Loc, bool Signed) { - assert(AR->isAffine() && "Cannot generate RT check for " - "non-affine expression"); - - SCEVUnionPredicate Pred; - const SCEV *ExitCount = - SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred); - - assert(ExitCount != SE.getCouldNotCompute() && "Invalid loop count"); - - const SCEV *Step = AR->getStepRecurrence(SE); - const SCEV *Start = AR->getStart(); - - Type *ARTy = AR->getType(); - unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType()); - unsigned DstBits = SE.getTypeSizeInBits(ARTy); - - // The expression {Start,+,Step} has nusw/nssw if - // Step < 0, Start - |Step| * Backedge <= Start - // Step >= 0, Start + |Step| * Backedge > Start - // and |Step| * Backedge doesn't unsigned overflow. - - IntegerType *CountTy = IntegerType::get(Loc->getContext(), SrcBits); - Builder.SetInsertPoint(Loc); - Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc); - - IntegerType *Ty = - IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy)); - Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty; - - Value *StepValue = expandCodeFor(Step, Ty, Loc); - Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc); - Value *StartValue = expandCodeFor(Start, ARExpandTy, Loc); - - ConstantInt *Zero = - ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits)); - - Builder.SetInsertPoint(Loc); - // Compute |Step| - Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero); - Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue); - - // Get the backedge taken count and truncate or extended to the AR type. - Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty); - auto *MulF = Intrinsic::getDeclaration(Loc->getModule(), - Intrinsic::umul_with_overflow, Ty); - - // Compute |Step| * Backedge - CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul"); - Value *MulV = Builder.CreateExtractValue(Mul, 0, "mul.result"); - Value *OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow"); - - // Compute: - // Start + |Step| * Backedge < Start - // Start - |Step| * Backedge > Start - Value *Add = nullptr, *Sub = nullptr; - if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARExpandTy)) { - const SCEV *MulS = SE.getSCEV(MulV); - const SCEV *NegMulS = SE.getNegativeSCEV(MulS); - Add = Builder.CreateBitCast(expandAddToGEP(MulS, ARPtrTy, Ty, StartValue), - ARPtrTy); - Sub = Builder.CreateBitCast( - expandAddToGEP(NegMulS, ARPtrTy, Ty, StartValue), ARPtrTy); - } else { - Add = Builder.CreateAdd(StartValue, MulV); - Sub = Builder.CreateSub(StartValue, MulV); - } - - Value *EndCompareGT = Builder.CreateICmp( - Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue); - - Value *EndCompareLT = Builder.CreateICmp( - Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue); - - // Select the answer based on the sign of Step. - Value *EndCheck = - Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT); - - // If the backedge taken count type is larger than the AR type, - // check that we don't drop any bits by truncating it. If we are - // dropping bits, then we have overflow (unless the step is zero). - if (SE.getTypeSizeInBits(CountTy) > SE.getTypeSizeInBits(Ty)) { - auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits); - auto *BackedgeCheck = - Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal, - ConstantInt::get(Loc->getContext(), MaxVal)); - BackedgeCheck = Builder.CreateAnd( - BackedgeCheck, Builder.CreateICmp(ICmpInst::ICMP_NE, StepValue, Zero)); - - EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck); - } - - EndCheck = Builder.CreateOr(EndCheck, OfMul); - return EndCheck; -} - -Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred, - Instruction *IP) { - const auto *A = cast<SCEVAddRecExpr>(Pred->getExpr()); - Value *NSSWCheck = nullptr, *NUSWCheck = nullptr; - - // Add a check for NUSW - if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW) - NUSWCheck = generateOverflowCheck(A, IP, false); - - // Add a check for NSSW - if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW) - NSSWCheck = generateOverflowCheck(A, IP, true); - - if (NUSWCheck && NSSWCheck) - return Builder.CreateOr(NUSWCheck, NSSWCheck); - - if (NUSWCheck) - return NUSWCheck; - - if (NSSWCheck) - return NSSWCheck; - - return ConstantInt::getFalse(IP->getContext()); -} - -Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union, - Instruction *IP) { - auto *BoolType = IntegerType::get(IP->getContext(), 1); - Value *Check = ConstantInt::getNullValue(BoolType); - - // Loop over all checks in this set. - for (auto Pred : Union->getPredicates()) { - auto *NextCheck = expandCodeForPredicate(Pred, IP); - Builder.SetInsertPoint(IP); - Check = Builder.CreateOr(Check, NextCheck); - } - - return Check; -} - -namespace { -// Search for a SCEV subexpression that is not safe to expand. Any expression -// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely -// UDiv expressions. We don't know if the UDiv is derived from an IR divide -// instruction, but the important thing is that we prove the denominator is -// nonzero before expansion. -// -// IVUsers already checks that IV-derived expressions are safe. So this check is -// only needed when the expression includes some subexpression that is not IV -// derived. -// -// Currently, we only allow division by a nonzero constant here. If this is -// inadequate, we could easily allow division by SCEVUnknown by using -// ValueTracking to check isKnownNonZero(). -// -// We cannot generally expand recurrences unless the step dominates the loop -// header. The expander handles the special case of affine recurrences by -// scaling the recurrence outside the loop, but this technique isn't generally -// applicable. Expanding a nested recurrence outside a loop requires computing -// binomial coefficients. This could be done, but the recurrence has to be in a -// perfectly reduced form, which can't be guaranteed. -struct SCEVFindUnsafe { - ScalarEvolution &SE; - bool IsUnsafe; - - SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {} - - bool follow(const SCEV *S) { - if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { - const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS()); - if (!SC || SC->getValue()->isZero()) { - IsUnsafe = true; - return false; - } - } - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { - const SCEV *Step = AR->getStepRecurrence(SE); - if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) { - IsUnsafe = true; - return false; - } - } - return true; - } - bool isDone() const { return IsUnsafe; } -}; -} - -namespace llvm { -bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) { - SCEVFindUnsafe Search(SE); - visitAll(S, Search); - return !Search.IsUnsafe; -} - -bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, - ScalarEvolution &SE) { - if (!isSafeToExpand(S, SE)) - return false; - // We have to prove that the expanded site of S dominates InsertionPoint. - // This is easy when not in the same block, but hard when S is an instruction - // to be expanded somewhere inside the same block as our insertion point. - // What we really need here is something analogous to an OrderedBasicBlock, - // but for the moment, we paper over the problem by handling two common and - // cheap to check cases. - if (SE.properlyDominates(S, InsertionPoint->getParent())) - return true; - if (SE.dominates(S, InsertionPoint->getParent())) { - if (InsertionPoint->getParent()->getTerminator() == InsertionPoint) - return true; - if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) - for (const Value *V : InsertionPoint->operand_values()) - if (V == U->getValue()) - return true; - } - return false; -} -} diff --git a/llvm/lib/Analysis/StackLifetime.cpp b/llvm/lib/Analysis/StackLifetime.cpp new file mode 100644 index 0000000000000..9727b7a33d1fb --- /dev/null +++ b/llvm/lib/Analysis/StackLifetime.cpp @@ -0,0 +1,373 @@ +//===- StackLifetime.cpp - Alloca Lifetime Analysis -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/StackLifetime.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/AssemblyAnnotationWriter.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" +#include <algorithm> +#include <memory> +#include <tuple> + +using namespace llvm; + +#define DEBUG_TYPE "stack-lifetime" + +const StackLifetime::LiveRange & +StackLifetime::getLiveRange(const AllocaInst *AI) const { + const auto IT = AllocaNumbering.find(AI); + assert(IT != AllocaNumbering.end()); + return LiveRanges[IT->second]; +} + +bool StackLifetime::isReachable(const Instruction *I) const { + return BlockInstRange.find(I->getParent()) != BlockInstRange.end(); +} + +bool StackLifetime::isAliveAfter(const AllocaInst *AI, + const Instruction *I) const { + const BasicBlock *BB = I->getParent(); + auto ItBB = BlockInstRange.find(BB); + assert(ItBB != BlockInstRange.end() && "Unreachable is not expected"); + + // Search the block for the first instruction following 'I'. + auto It = std::upper_bound(Instructions.begin() + ItBB->getSecond().first + 1, + Instructions.begin() + ItBB->getSecond().second, I, + [](const Instruction *L, const Instruction *R) { + return L->comesBefore(R); + }); + --It; + unsigned InstNum = It - Instructions.begin(); + return getLiveRange(AI).test(InstNum); +} + +static bool readMarker(const Instruction *I, bool *IsStart) { + if (!I->isLifetimeStartOrEnd()) + return false; + + auto *II = cast<IntrinsicInst>(I); + *IsStart = II->getIntrinsicID() == Intrinsic::lifetime_start; + return true; +} + +void StackLifetime::collectMarkers() { + InterestingAllocas.resize(NumAllocas); + DenseMap<const BasicBlock *, SmallDenseMap<const IntrinsicInst *, Marker>> + BBMarkerSet; + + // Compute the set of start/end markers per basic block. + for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) { + const AllocaInst *AI = Allocas[AllocaNo]; + SmallVector<const Instruction *, 8> WorkList; + WorkList.push_back(AI); + while (!WorkList.empty()) { + const Instruction *I = WorkList.pop_back_val(); + for (const User *U : I->users()) { + if (auto *BI = dyn_cast<BitCastInst>(U)) { + WorkList.push_back(BI); + continue; + } + auto *UI = dyn_cast<IntrinsicInst>(U); + if (!UI) + continue; + bool IsStart; + if (!readMarker(UI, &IsStart)) + continue; + if (IsStart) + InterestingAllocas.set(AllocaNo); + BBMarkerSet[UI->getParent()][UI] = {AllocaNo, IsStart}; + } + } + } + + // Compute instruction numbering. Only the following instructions are + // considered: + // * Basic block entries + // * Lifetime markers + // For each basic block, compute + // * the list of markers in the instruction order + // * the sets of allocas whose lifetime starts or ends in this BB + LLVM_DEBUG(dbgs() << "Instructions:\n"); + for (const BasicBlock *BB : depth_first(&F)) { + LLVM_DEBUG(dbgs() << " " << Instructions.size() << ": BB " << BB->getName() + << "\n"); + auto BBStart = Instructions.size(); + Instructions.push_back(nullptr); + + BlockLifetimeInfo &BlockInfo = + BlockLiveness.try_emplace(BB, NumAllocas).first->getSecond(); + + auto &BlockMarkerSet = BBMarkerSet[BB]; + if (BlockMarkerSet.empty()) { + BlockInstRange[BB] = std::make_pair(BBStart, Instructions.size()); + continue; + } + + auto ProcessMarker = [&](const IntrinsicInst *I, const Marker &M) { + LLVM_DEBUG(dbgs() << " " << Instructions.size() << ": " + << (M.IsStart ? "start " : "end ") << M.AllocaNo + << ", " << *I << "\n"); + + BBMarkers[BB].push_back({Instructions.size(), M}); + Instructions.push_back(I); + + if (M.IsStart) { + BlockInfo.End.reset(M.AllocaNo); + BlockInfo.Begin.set(M.AllocaNo); + } else { + BlockInfo.Begin.reset(M.AllocaNo); + BlockInfo.End.set(M.AllocaNo); + } + }; + + if (BlockMarkerSet.size() == 1) { + ProcessMarker(BlockMarkerSet.begin()->getFirst(), + BlockMarkerSet.begin()->getSecond()); + } else { + // Scan the BB to determine the marker order. + for (const Instruction &I : *BB) { + const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I); + if (!II) + continue; + auto It = BlockMarkerSet.find(II); + if (It == BlockMarkerSet.end()) + continue; + ProcessMarker(II, It->getSecond()); + } + } + + BlockInstRange[BB] = std::make_pair(BBStart, Instructions.size()); + } +} + +void StackLifetime::calculateLocalLiveness() { + bool Changed = true; + while (Changed) { + Changed = false; + + for (const BasicBlock *BB : depth_first(&F)) { + BlockLifetimeInfo &BlockInfo = BlockLiveness.find(BB)->getSecond(); + + // Compute LiveIn by unioning together the LiveOut sets of all preds. + BitVector LocalLiveIn; + for (auto *PredBB : predecessors(BB)) { + LivenessMap::const_iterator I = BlockLiveness.find(PredBB); + // If a predecessor is unreachable, ignore it. + if (I == BlockLiveness.end()) + continue; + switch (Type) { + case LivenessType::May: + LocalLiveIn |= I->second.LiveOut; + break; + case LivenessType::Must: + if (LocalLiveIn.empty()) + LocalLiveIn = I->second.LiveOut; + else + LocalLiveIn &= I->second.LiveOut; + break; + } + } + + // Compute LiveOut by subtracting out lifetimes that end in this + // block, then adding in lifetimes that begin in this block. If + // we have both BEGIN and END markers in the same basic block + // then we know that the BEGIN marker comes after the END, + // because we already handle the case where the BEGIN comes + // before the END when collecting the markers (and building the + // BEGIN/END vectors). + BitVector LocalLiveOut = LocalLiveIn; + LocalLiveOut.reset(BlockInfo.End); + LocalLiveOut |= BlockInfo.Begin; + + // Update block LiveIn set, noting whether it has changed. + if (LocalLiveIn.test(BlockInfo.LiveIn)) { + BlockInfo.LiveIn |= LocalLiveIn; + } + + // Update block LiveOut set, noting whether it has changed. + if (LocalLiveOut.test(BlockInfo.LiveOut)) { + Changed = true; + BlockInfo.LiveOut |= LocalLiveOut; + } + } + } // while changed. +} + +void StackLifetime::calculateLiveIntervals() { + for (auto IT : BlockLiveness) { + const BasicBlock *BB = IT.getFirst(); + BlockLifetimeInfo &BlockInfo = IT.getSecond(); + unsigned BBStart, BBEnd; + std::tie(BBStart, BBEnd) = BlockInstRange[BB]; + + BitVector Started, Ended; + Started.resize(NumAllocas); + Ended.resize(NumAllocas); + SmallVector<unsigned, 8> Start; + Start.resize(NumAllocas); + + // LiveIn ranges start at the first instruction. + for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) { + if (BlockInfo.LiveIn.test(AllocaNo)) { + Started.set(AllocaNo); + Start[AllocaNo] = BBStart; + } + } + + for (auto &It : BBMarkers[BB]) { + unsigned InstNo = It.first; + bool IsStart = It.second.IsStart; + unsigned AllocaNo = It.second.AllocaNo; + + if (IsStart) { + assert(!Started.test(AllocaNo) || Start[AllocaNo] == BBStart); + if (!Started.test(AllocaNo)) { + Started.set(AllocaNo); + Ended.reset(AllocaNo); + Start[AllocaNo] = InstNo; + } + } else { + assert(!Ended.test(AllocaNo)); + if (Started.test(AllocaNo)) { + LiveRanges[AllocaNo].addRange(Start[AllocaNo], InstNo); + Started.reset(AllocaNo); + } + Ended.set(AllocaNo); + } + } + + for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) + if (Started.test(AllocaNo)) + LiveRanges[AllocaNo].addRange(Start[AllocaNo], BBEnd); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void StackLifetime::dumpAllocas() const { + dbgs() << "Allocas:\n"; + for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) + dbgs() << " " << AllocaNo << ": " << *Allocas[AllocaNo] << "\n"; +} + +LLVM_DUMP_METHOD void StackLifetime::dumpBlockLiveness() const { + dbgs() << "Block liveness:\n"; + for (auto IT : BlockLiveness) { + const BasicBlock *BB = IT.getFirst(); + const BlockLifetimeInfo &BlockInfo = BlockLiveness.find(BB)->getSecond(); + auto BlockRange = BlockInstRange.find(BB)->getSecond(); + dbgs() << " BB [" << BlockRange.first << ", " << BlockRange.second + << "): begin " << BlockInfo.Begin << ", end " << BlockInfo.End + << ", livein " << BlockInfo.LiveIn << ", liveout " + << BlockInfo.LiveOut << "\n"; + } +} + +LLVM_DUMP_METHOD void StackLifetime::dumpLiveRanges() const { + dbgs() << "Alloca liveness:\n"; + for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) + dbgs() << " " << AllocaNo << ": " << LiveRanges[AllocaNo] << "\n"; +} +#endif + +StackLifetime::StackLifetime(const Function &F, + ArrayRef<const AllocaInst *> Allocas, + LivenessType Type) + : F(F), Type(Type), Allocas(Allocas), NumAllocas(Allocas.size()) { + LLVM_DEBUG(dumpAllocas()); + + for (unsigned I = 0; I < NumAllocas; ++I) + AllocaNumbering[Allocas[I]] = I; + + collectMarkers(); +} + +void StackLifetime::run() { + LiveRanges.resize(NumAllocas, LiveRange(Instructions.size())); + for (unsigned I = 0; I < NumAllocas; ++I) + if (!InterestingAllocas.test(I)) + LiveRanges[I] = getFullLiveRange(); + + calculateLocalLiveness(); + LLVM_DEBUG(dumpBlockLiveness()); + calculateLiveIntervals(); + LLVM_DEBUG(dumpLiveRanges()); +} + +class StackLifetime::LifetimeAnnotationWriter + : public AssemblyAnnotationWriter { + const StackLifetime &SL; + + void printInstrAlive(unsigned InstrNo, formatted_raw_ostream &OS) { + SmallVector<StringRef, 16> Names; + for (const auto &KV : SL.AllocaNumbering) { + if (SL.LiveRanges[KV.getSecond()].test(InstrNo)) + Names.push_back(KV.getFirst()->getName()); + } + llvm::sort(Names); + OS << " ; Alive: <" << llvm::join(Names, " ") << ">\n"; + } + + void emitBasicBlockStartAnnot(const BasicBlock *BB, + formatted_raw_ostream &OS) override { + auto ItBB = SL.BlockInstRange.find(BB); + if (ItBB == SL.BlockInstRange.end()) + return; // Unreachable. + printInstrAlive(ItBB->getSecond().first, OS); + } + + void printInfoComment(const Value &V, formatted_raw_ostream &OS) override { + const Instruction *Instr = dyn_cast<Instruction>(&V); + if (!Instr || !SL.isReachable(Instr)) + return; + + SmallVector<StringRef, 16> Names; + for (const auto &KV : SL.AllocaNumbering) { + if (SL.isAliveAfter(KV.getFirst(), Instr)) + Names.push_back(KV.getFirst()->getName()); + } + llvm::sort(Names); + OS << "\n ; Alive: <" << llvm::join(Names, " ") << ">\n"; + } + +public: + LifetimeAnnotationWriter(const StackLifetime &SL) : SL(SL) {} +}; + +void StackLifetime::print(raw_ostream &OS) { + LifetimeAnnotationWriter AAW(*this); + F.print(OS, &AAW); +} + +PreservedAnalyses StackLifetimePrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + SmallVector<const AllocaInst *, 8> Allocas; + for (auto &I : instructions(F)) + if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) + Allocas.push_back(AI); + StackLifetime SL(F, Allocas, Type); + SL.run(); + SL.print(OS); + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp index 7f5bedabbd80b..bbfc303aefac4 100644 --- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -9,56 +9,49 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/StackSafetyAnalysis.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/IR/CallSite.h" +#include "llvm/Analysis/StackLifetime.h" +#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <memory> using namespace llvm; #define DEBUG_TYPE "stack-safety" +STATISTIC(NumAllocaStackSafe, "Number of safe allocas"); +STATISTIC(NumAllocaTotal, "Number of total allocas"); + static cl::opt<int> StackSafetyMaxIterations("stack-safety-max-iterations", cl::init(20), cl::Hidden); -namespace { - -/// Rewrite an SCEV expression for a memory access address to an expression that -/// represents offset from the given alloca. -class AllocaOffsetRewriter : public SCEVRewriteVisitor<AllocaOffsetRewriter> { - const Value *AllocaPtr; +static cl::opt<bool> StackSafetyPrint("stack-safety-print", cl::init(false), + cl::Hidden); -public: - AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr) - : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {} - - const SCEV *visit(const SCEV *Expr) { - // Only re-write the expression if the alloca is used in an addition - // expression (it can be used in other types of expressions if it's cast to - // an int and passed as an argument.) - if (!isa<SCEVAddRecExpr>(Expr) && !isa<SCEVAddExpr>(Expr) && - !isa<SCEVUnknown>(Expr)) - return Expr; - return SCEVRewriteVisitor<AllocaOffsetRewriter>::visit(Expr); - } +static cl::opt<bool> StackSafetyRun("stack-safety-run", cl::init(false), + cl::Hidden); - const SCEV *visitUnknown(const SCEVUnknown *Expr) { - // FIXME: look through one or several levels of definitions? - // This can be inttoptr(AllocaPtr) and SCEV would not unwrap - // it for us. - if (Expr->getValue() == AllocaPtr) - return SE.getZero(Expr->getType()); - return Expr; - } -}; +namespace { /// Describes use of address in as a function call argument. -struct PassAsArgInfo { +template <typename CalleeTy> struct CallInfo { /// Function being called. - const GlobalValue *Callee = nullptr; + const CalleeTy *Callee = nullptr; /// Index of argument which pass address. size_t ParamNo = 0; // Offset range of address from base address (alloca or calling function @@ -66,234 +59,262 @@ struct PassAsArgInfo { // Range should never set to empty-set, that is an invalid access range // that can cause empty-set to be propagated with ConstantRange::add ConstantRange Offset; - PassAsArgInfo(const GlobalValue *Callee, size_t ParamNo, ConstantRange Offset) + CallInfo(const CalleeTy *Callee, size_t ParamNo, const ConstantRange &Offset) : Callee(Callee), ParamNo(ParamNo), Offset(Offset) {} - - StringRef getName() const { return Callee->getName(); } }; -raw_ostream &operator<<(raw_ostream &OS, const PassAsArgInfo &P) { - return OS << "@" << P.getName() << "(arg" << P.ParamNo << ", " << P.Offset - << ")"; +template <typename CalleeTy> +raw_ostream &operator<<(raw_ostream &OS, const CallInfo<CalleeTy> &P) { + return OS << "@" << P.Callee->getName() << "(arg" << P.ParamNo << ", " + << P.Offset << ")"; } /// Describe uses of address (alloca or parameter) inside of the function. -struct UseInfo { +template <typename CalleeTy> struct UseInfo { // Access range if the address (alloca or parameters). // It is allowed to be empty-set when there are no known accesses. ConstantRange Range; // List of calls which pass address as an argument. - SmallVector<PassAsArgInfo, 4> Calls; + SmallVector<CallInfo<CalleeTy>, 4> Calls; - explicit UseInfo(unsigned PointerSize) : Range{PointerSize, false} {} + UseInfo(unsigned PointerSize) : Range{PointerSize, false} {} - void updateRange(ConstantRange R) { Range = Range.unionWith(R); } + void updateRange(const ConstantRange &R) { + assert(!R.isUpperSignWrapped()); + Range = Range.unionWith(R); + assert(!Range.isUpperSignWrapped()); + } }; -raw_ostream &operator<<(raw_ostream &OS, const UseInfo &U) { +template <typename CalleeTy> +raw_ostream &operator<<(raw_ostream &OS, const UseInfo<CalleeTy> &U) { OS << U.Range; for (auto &Call : U.Calls) OS << ", " << Call; return OS; } -struct AllocaInfo { - const AllocaInst *AI = nullptr; - uint64_t Size = 0; - UseInfo Use; - - AllocaInfo(unsigned PointerSize, const AllocaInst *AI, uint64_t Size) - : AI(AI), Size(Size), Use(PointerSize) {} - - StringRef getName() const { return AI->getName(); } -}; - -raw_ostream &operator<<(raw_ostream &OS, const AllocaInfo &A) { - return OS << A.getName() << "[" << A.Size << "]: " << A.Use; +// Check if we should bailout for such ranges. +bool isUnsafe(const ConstantRange &R) { + return R.isEmptySet() || R.isFullSet() || R.isUpperSignWrapped(); } -struct ParamInfo { - const Argument *Arg = nullptr; - UseInfo Use; - - explicit ParamInfo(unsigned PointerSize, const Argument *Arg) - : Arg(Arg), Use(PointerSize) {} - - StringRef getName() const { return Arg ? Arg->getName() : "<N/A>"; } -}; - -raw_ostream &operator<<(raw_ostream &OS, const ParamInfo &P) { - return OS << P.getName() << "[]: " << P.Use; +ConstantRange addOverflowNever(const ConstantRange &L, const ConstantRange &R) { + if (L.signedAddMayOverflow(R) != + ConstantRange::OverflowResult::NeverOverflows) + return ConstantRange(L.getBitWidth(), true); + return L.add(R); } -/// Calculate the allocation size of a given alloca. Returns 0 if the -/// size can not be statically determined. -uint64_t getStaticAllocaAllocationSize(const AllocaInst *AI) { - const DataLayout &DL = AI->getModule()->getDataLayout(); - uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType()); - if (AI->isArrayAllocation()) { - auto C = dyn_cast<ConstantInt>(AI->getArraySize()); +/// Calculate the allocation size of a given alloca. Returns empty range +// in case of confution. +ConstantRange getStaticAllocaSizeRange(const AllocaInst &AI) { + const DataLayout &DL = AI.getModule()->getDataLayout(); + TypeSize TS = DL.getTypeAllocSize(AI.getAllocatedType()); + unsigned PointerSize = DL.getMaxPointerSizeInBits(); + // Fallback to empty range for alloca size. + ConstantRange R = ConstantRange::getEmpty(PointerSize); + if (TS.isScalable()) + return R; + APInt APSize(PointerSize, TS.getFixedSize(), true); + if (APSize.isNonPositive()) + return R; + if (AI.isArrayAllocation()) { + const auto *C = dyn_cast<ConstantInt>(AI.getArraySize()); if (!C) - return 0; - Size *= C->getZExtValue(); + return R; + bool Overflow = false; + APInt Mul = C->getValue(); + if (Mul.isNonPositive()) + return R; + Mul = Mul.sextOrTrunc(PointerSize); + APSize = APSize.smul_ov(Mul, Overflow); + if (Overflow) + return R; } - return Size; + R = ConstantRange(APInt::getNullValue(PointerSize), APSize); + assert(!isUnsafe(R)); + return R; } -} // end anonymous namespace - -/// Describes uses of allocas and parameters inside of a single function. -struct StackSafetyInfo::FunctionInfo { - // May be a Function or a GlobalAlias - const GlobalValue *GV = nullptr; - // Informations about allocas uses. - SmallVector<AllocaInfo, 4> Allocas; - // Informations about parameters uses. - SmallVector<ParamInfo, 4> Params; +template <typename CalleeTy> struct FunctionInfo { + std::map<const AllocaInst *, UseInfo<CalleeTy>> Allocas; + std::map<uint32_t, UseInfo<CalleeTy>> Params; // TODO: describe return value as depending on one or more of its arguments. // StackSafetyDataFlowAnalysis counter stored here for faster access. int UpdateCount = 0; - FunctionInfo(const StackSafetyInfo &SSI) : FunctionInfo(*SSI.Info) {} - - explicit FunctionInfo(const Function *F) : GV(F){}; - // Creates FunctionInfo that forwards all the parameters to the aliasee. - explicit FunctionInfo(const GlobalAlias *A); - - FunctionInfo(FunctionInfo &&) = default; - - bool IsDSOLocal() const { return GV->isDSOLocal(); }; - - bool IsInterposable() const { return GV->isInterposable(); }; - - StringRef getName() const { return GV->getName(); } - - void print(raw_ostream &O) const { + void print(raw_ostream &O, StringRef Name, const Function *F) const { // TODO: Consider different printout format after // StackSafetyDataFlowAnalysis. Calls and parameters are irrelevant then. - O << " @" << getName() << (IsDSOLocal() ? "" : " dso_preemptable") - << (IsInterposable() ? " interposable" : "") << "\n"; + O << " @" << Name << ((F && F->isDSOLocal()) ? "" : " dso_preemptable") + << ((F && F->isInterposable()) ? " interposable" : "") << "\n"; + O << " args uses:\n"; - for (auto &P : Params) - O << " " << P << "\n"; + for (auto &KV : Params) { + O << " "; + if (F) + O << F->getArg(KV.first)->getName(); + else + O << formatv("arg{0}", KV.first); + O << "[]: " << KV.second << "\n"; + } + O << " allocas uses:\n"; - for (auto &AS : Allocas) - O << " " << AS << "\n"; + if (F) { + for (auto &I : instructions(F)) { + if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { + auto &AS = Allocas.find(AI)->second; + O << " " << AI->getName() << "[" + << getStaticAllocaSizeRange(*AI).getUpper() << "]: " << AS << "\n"; + } + } + } else { + assert(Allocas.empty()); + } } +}; + +using GVToSSI = std::map<const GlobalValue *, FunctionInfo<GlobalValue>>; -private: - FunctionInfo(const FunctionInfo &) = default; +} // namespace + +struct StackSafetyInfo::InfoTy { + FunctionInfo<GlobalValue> Info; }; -StackSafetyInfo::FunctionInfo::FunctionInfo(const GlobalAlias *A) : GV(A) { - unsigned PointerSize = A->getParent()->getDataLayout().getPointerSizeInBits(); - const GlobalObject *Aliasee = A->getBaseObject(); - const FunctionType *Type = cast<FunctionType>(Aliasee->getValueType()); - // 'Forward' all parameters to this alias to the aliasee - for (unsigned ArgNo = 0; ArgNo < Type->getNumParams(); ArgNo++) { - Params.emplace_back(PointerSize, nullptr); - UseInfo &US = Params.back().Use; - US.Calls.emplace_back(Aliasee, ArgNo, ConstantRange(APInt(PointerSize, 0))); - } -} +struct StackSafetyGlobalInfo::InfoTy { + GVToSSI Info; + SmallPtrSet<const AllocaInst *, 8> SafeAllocas; +}; namespace { class StackSafetyLocalAnalysis { - const Function &F; + Function &F; const DataLayout &DL; ScalarEvolution &SE; unsigned PointerSize = 0; const ConstantRange UnknownRange; - ConstantRange offsetFromAlloca(Value *Addr, const Value *AllocaPtr); - ConstantRange getAccessRange(Value *Addr, const Value *AllocaPtr, - uint64_t AccessSize); + ConstantRange offsetFrom(Value *Addr, Value *Base); + ConstantRange getAccessRange(Value *Addr, Value *Base, + const ConstantRange &SizeRange); + ConstantRange getAccessRange(Value *Addr, Value *Base, TypeSize Size); ConstantRange getMemIntrinsicAccessRange(const MemIntrinsic *MI, const Use &U, - const Value *AllocaPtr); + Value *Base); - bool analyzeAllUses(const Value *Ptr, UseInfo &AS); - - ConstantRange getRange(uint64_t Lower, uint64_t Upper) const { - return ConstantRange(APInt(PointerSize, Lower), APInt(PointerSize, Upper)); - } + bool analyzeAllUses(Value *Ptr, UseInfo<GlobalValue> &AS, + const StackLifetime &SL); public: - StackSafetyLocalAnalysis(const Function &F, ScalarEvolution &SE) + StackSafetyLocalAnalysis(Function &F, ScalarEvolution &SE) : F(F), DL(F.getParent()->getDataLayout()), SE(SE), PointerSize(DL.getPointerSizeInBits()), UnknownRange(PointerSize, true) {} // Run the transformation on the associated function. - StackSafetyInfo run(); + FunctionInfo<GlobalValue> run(); }; -ConstantRange -StackSafetyLocalAnalysis::offsetFromAlloca(Value *Addr, - const Value *AllocaPtr) { - if (!SE.isSCEVable(Addr->getType())) +ConstantRange StackSafetyLocalAnalysis::offsetFrom(Value *Addr, Value *Base) { + if (!SE.isSCEVable(Addr->getType()) || !SE.isSCEVable(Base->getType())) return UnknownRange; - AllocaOffsetRewriter Rewriter(SE, AllocaPtr); - const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr)); - ConstantRange Offset = SE.getUnsignedRange(Expr).zextOrTrunc(PointerSize); - assert(!Offset.isEmptySet()); - return Offset; + auto *PtrTy = IntegerType::getInt8PtrTy(SE.getContext()); + const SCEV *AddrExp = SE.getTruncateOrZeroExtend(SE.getSCEV(Addr), PtrTy); + const SCEV *BaseExp = SE.getTruncateOrZeroExtend(SE.getSCEV(Base), PtrTy); + const SCEV *Diff = SE.getMinusSCEV(AddrExp, BaseExp); + + ConstantRange Offset = SE.getSignedRange(Diff); + if (isUnsafe(Offset)) + return UnknownRange; + return Offset.sextOrTrunc(PointerSize); } -ConstantRange StackSafetyLocalAnalysis::getAccessRange(Value *Addr, - const Value *AllocaPtr, - uint64_t AccessSize) { - if (!SE.isSCEVable(Addr->getType())) +ConstantRange +StackSafetyLocalAnalysis::getAccessRange(Value *Addr, Value *Base, + const ConstantRange &SizeRange) { + // Zero-size loads and stores do not access memory. + if (SizeRange.isEmptySet()) + return ConstantRange::getEmpty(PointerSize); + assert(!isUnsafe(SizeRange)); + + ConstantRange Offsets = offsetFrom(Addr, Base); + if (isUnsafe(Offsets)) return UnknownRange; - AllocaOffsetRewriter Rewriter(SE, AllocaPtr); - const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr)); + Offsets = addOverflowNever(Offsets, SizeRange); + if (isUnsafe(Offsets)) + return UnknownRange; + return Offsets; +} - ConstantRange AccessStartRange = - SE.getUnsignedRange(Expr).zextOrTrunc(PointerSize); - ConstantRange SizeRange = getRange(0, AccessSize); - ConstantRange AccessRange = AccessStartRange.add(SizeRange); - assert(!AccessRange.isEmptySet()); - return AccessRange; +ConstantRange StackSafetyLocalAnalysis::getAccessRange(Value *Addr, Value *Base, + TypeSize Size) { + if (Size.isScalable()) + return UnknownRange; + APInt APSize(PointerSize, Size.getFixedSize(), true); + if (APSize.isNegative()) + return UnknownRange; + return getAccessRange( + Addr, Base, ConstantRange(APInt::getNullValue(PointerSize), APSize)); } ConstantRange StackSafetyLocalAnalysis::getMemIntrinsicAccessRange( - const MemIntrinsic *MI, const Use &U, const Value *AllocaPtr) { - if (auto MTI = dyn_cast<MemTransferInst>(MI)) { + const MemIntrinsic *MI, const Use &U, Value *Base) { + if (const auto *MTI = dyn_cast<MemTransferInst>(MI)) { if (MTI->getRawSource() != U && MTI->getRawDest() != U) - return getRange(0, 1); + return ConstantRange::getEmpty(PointerSize); } else { if (MI->getRawDest() != U) - return getRange(0, 1); + return ConstantRange::getEmpty(PointerSize); } - const auto *Len = dyn_cast<ConstantInt>(MI->getLength()); - // Non-constant size => unsafe. FIXME: try SCEV getRange. - if (!Len) + + auto *CalculationTy = IntegerType::getIntNTy(SE.getContext(), PointerSize); + if (!SE.isSCEVable(MI->getLength()->getType())) return UnknownRange; - ConstantRange AccessRange = getAccessRange(U, AllocaPtr, Len->getZExtValue()); - return AccessRange; + + const SCEV *Expr = + SE.getTruncateOrZeroExtend(SE.getSCEV(MI->getLength()), CalculationTy); + ConstantRange Sizes = SE.getSignedRange(Expr); + if (Sizes.getUpper().isNegative() || isUnsafe(Sizes)) + return UnknownRange; + Sizes = Sizes.sextOrTrunc(PointerSize); + ConstantRange SizeRange(APInt::getNullValue(PointerSize), + Sizes.getUpper() - 1); + return getAccessRange(U, Base, SizeRange); } /// The function analyzes all local uses of Ptr (alloca or argument) and /// calculates local access range and all function calls where it was used. -bool StackSafetyLocalAnalysis::analyzeAllUses(const Value *Ptr, UseInfo &US) { +bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr, + UseInfo<GlobalValue> &US, + const StackLifetime &SL) { SmallPtrSet<const Value *, 16> Visited; SmallVector<const Value *, 8> WorkList; WorkList.push_back(Ptr); + const AllocaInst *AI = dyn_cast<AllocaInst>(Ptr); // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc. while (!WorkList.empty()) { const Value *V = WorkList.pop_back_val(); for (const Use &UI : V->uses()) { - auto I = cast<const Instruction>(UI.getUser()); + const auto *I = cast<Instruction>(UI.getUser()); + if (!SL.isReachable(I)) + continue; + assert(V == UI.get()); switch (I->getOpcode()) { case Instruction::Load: { + if (AI && !SL.isAliveAfter(AI, I)) { + US.updateRange(UnknownRange); + return false; + } US.updateRange( getAccessRange(UI, Ptr, DL.getTypeStoreSize(I->getType()))); break; @@ -308,6 +329,10 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(const Value *Ptr, UseInfo &US) { US.updateRange(UnknownRange); return false; } + if (AI && !SL.isAliveAfter(AI, I)) { + US.updateRange(UnknownRange); + return false; + } US.updateRange(getAccessRange( UI, Ptr, DL.getTypeStoreSize(I->getOperand(0)->getType()))); break; @@ -322,36 +347,44 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(const Value *Ptr, UseInfo &US) { case Instruction::Call: case Instruction::Invoke: { - ImmutableCallSite CS(I); - if (I->isLifetimeStartOrEnd()) break; + if (AI && !SL.isAliveAfter(AI, I)) { + US.updateRange(UnknownRange); + return false; + } + if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { US.updateRange(getMemIntrinsicAccessRange(MI, UI, Ptr)); break; } + const auto &CB = cast<CallBase>(*I); + if (!CB.isArgOperand(&UI)) { + US.updateRange(UnknownRange); + return false; + } + + unsigned ArgNo = CB.getArgOperandNo(&UI); + if (CB.isByValArgument(ArgNo)) { + US.updateRange(getAccessRange( + UI, Ptr, DL.getTypeStoreSize(CB.getParamByValType(ArgNo)))); + break; + } + // FIXME: consult devirt? // Do not follow aliases, otherwise we could inadvertently follow // dso_preemptable aliases or aliases with interposable linkage. const GlobalValue *Callee = - dyn_cast<GlobalValue>(CS.getCalledValue()->stripPointerCasts()); + dyn_cast<GlobalValue>(CB.getCalledOperand()->stripPointerCasts()); if (!Callee) { US.updateRange(UnknownRange); return false; } assert(isa<Function>(Callee) || isa<GlobalAlias>(Callee)); - - ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); - for (ImmutableCallSite::arg_iterator A = B; A != E; ++A) { - if (A->get() == V) { - ConstantRange Offset = offsetFromAlloca(UI, Ptr); - US.Calls.emplace_back(Callee, A - B, Offset); - } - } - + US.Calls.emplace_back(Callee, ArgNo, offsetFrom(UI, Ptr)); break; } @@ -365,51 +398,52 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(const Value *Ptr, UseInfo &US) { return true; } -StackSafetyInfo StackSafetyLocalAnalysis::run() { - StackSafetyInfo::FunctionInfo Info(&F); +FunctionInfo<GlobalValue> StackSafetyLocalAnalysis::run() { + FunctionInfo<GlobalValue> Info; assert(!F.isDeclaration() && "Can't run StackSafety on a function declaration"); LLVM_DEBUG(dbgs() << "[StackSafety] " << F.getName() << "\n"); - for (auto &I : instructions(F)) { - if (auto AI = dyn_cast<AllocaInst>(&I)) { - Info.Allocas.emplace_back(PointerSize, AI, - getStaticAllocaAllocationSize(AI)); - AllocaInfo &AS = Info.Allocas.back(); - analyzeAllUses(AI, AS.Use); - } + SmallVector<AllocaInst *, 64> Allocas; + for (auto &I : instructions(F)) + if (auto *AI = dyn_cast<AllocaInst>(&I)) + Allocas.push_back(AI); + StackLifetime SL(F, Allocas, StackLifetime::LivenessType::Must); + SL.run(); + + for (auto *AI : Allocas) { + auto &UI = Info.Allocas.emplace(AI, PointerSize).first->second; + analyzeAllUses(AI, UI, SL); } - for (const Argument &A : make_range(F.arg_begin(), F.arg_end())) { - Info.Params.emplace_back(PointerSize, &A); - ParamInfo &PS = Info.Params.back(); - analyzeAllUses(&A, PS.Use); + for (Argument &A : make_range(F.arg_begin(), F.arg_end())) { + // Non pointers and bypass arguments are not going to be used in any global + // processing. + if (A.getType()->isPointerTy() && !A.hasByValAttr()) { + auto &UI = Info.Params.emplace(A.getArgNo(), PointerSize).first->second; + analyzeAllUses(&A, UI, SL); + } } + LLVM_DEBUG(Info.print(dbgs(), F.getName(), &F)); LLVM_DEBUG(dbgs() << "[StackSafety] done\n"); - LLVM_DEBUG(Info.print(dbgs())); - return StackSafetyInfo(std::move(Info)); + return Info; } -class StackSafetyDataFlowAnalysis { - using FunctionMap = - std::map<const GlobalValue *, StackSafetyInfo::FunctionInfo>; +template <typename CalleeTy> class StackSafetyDataFlowAnalysis { + using FunctionMap = std::map<const CalleeTy *, FunctionInfo<CalleeTy>>; FunctionMap Functions; - // Callee-to-Caller multimap. - DenseMap<const GlobalValue *, SmallVector<const GlobalValue *, 4>> Callers; - SetVector<const GlobalValue *> WorkList; - - unsigned PointerSize = 0; const ConstantRange UnknownRange; - ConstantRange getArgumentAccessRange(const GlobalValue *Callee, - unsigned ParamNo) const; - bool updateOneUse(UseInfo &US, bool UpdateToFullSet); - void updateOneNode(const GlobalValue *Callee, - StackSafetyInfo::FunctionInfo &FS); - void updateOneNode(const GlobalValue *Callee) { + // Callee-to-Caller multimap. + DenseMap<const CalleeTy *, SmallVector<const CalleeTy *, 4>> Callers; + SetVector<const CalleeTy *> WorkList; + + bool updateOneUse(UseInfo<CalleeTy> &US, bool UpdateToFullSet); + void updateOneNode(const CalleeTy *Callee, FunctionInfo<CalleeTy> &FS); + void updateOneNode(const CalleeTy *Callee) { updateOneNode(Callee, Functions.find(Callee)->second); } void updateAllNodes() { @@ -422,51 +456,46 @@ class StackSafetyDataFlowAnalysis { #endif public: - StackSafetyDataFlowAnalysis( - Module &M, std::function<const StackSafetyInfo &(Function &)> FI); - StackSafetyGlobalInfo run(); -}; + StackSafetyDataFlowAnalysis(uint32_t PointerBitWidth, FunctionMap Functions) + : Functions(std::move(Functions)), + UnknownRange(ConstantRange::getFull(PointerBitWidth)) {} -StackSafetyDataFlowAnalysis::StackSafetyDataFlowAnalysis( - Module &M, std::function<const StackSafetyInfo &(Function &)> FI) - : PointerSize(M.getDataLayout().getPointerSizeInBits()), - UnknownRange(PointerSize, true) { - // Without ThinLTO, run the local analysis for every function in the TU and - // then run the DFA. - for (auto &F : M.functions()) - if (!F.isDeclaration()) - Functions.emplace(&F, FI(F)); - for (auto &A : M.aliases()) - if (isa<Function>(A.getBaseObject())) - Functions.emplace(&A, StackSafetyInfo::FunctionInfo(&A)); -} + const FunctionMap &run(); -ConstantRange -StackSafetyDataFlowAnalysis::getArgumentAccessRange(const GlobalValue *Callee, - unsigned ParamNo) const { - auto IT = Functions.find(Callee); + ConstantRange getArgumentAccessRange(const CalleeTy *Callee, unsigned ParamNo, + const ConstantRange &Offsets) const; +}; + +template <typename CalleeTy> +ConstantRange StackSafetyDataFlowAnalysis<CalleeTy>::getArgumentAccessRange( + const CalleeTy *Callee, unsigned ParamNo, + const ConstantRange &Offsets) const { + auto FnIt = Functions.find(Callee); // Unknown callee (outside of LTO domain or an indirect call). - if (IT == Functions.end()) + if (FnIt == Functions.end()) return UnknownRange; - const StackSafetyInfo::FunctionInfo &FS = IT->second; - // The definition of this symbol may not be the definition in this linkage - // unit. - if (!FS.IsDSOLocal() || FS.IsInterposable()) + auto &FS = FnIt->second; + auto ParamIt = FS.Params.find(ParamNo); + if (ParamIt == FS.Params.end()) return UnknownRange; - if (ParamNo >= FS.Params.size()) // possibly vararg + auto &Access = ParamIt->second.Range; + if (Access.isEmptySet()) + return Access; + if (Access.isFullSet()) return UnknownRange; - return FS.Params[ParamNo].Use.Range; + return addOverflowNever(Access, Offsets); } -bool StackSafetyDataFlowAnalysis::updateOneUse(UseInfo &US, - bool UpdateToFullSet) { +template <typename CalleeTy> +bool StackSafetyDataFlowAnalysis<CalleeTy>::updateOneUse(UseInfo<CalleeTy> &US, + bool UpdateToFullSet) { bool Changed = false; for (auto &CS : US.Calls) { assert(!CS.Offset.isEmptySet() && "Param range can't be empty-set, invalid offset range"); - ConstantRange CalleeRange = getArgumentAccessRange(CS.Callee, CS.ParamNo); - CalleeRange = CalleeRange.add(CS.Offset); + ConstantRange CalleeRange = + getArgumentAccessRange(CS.Callee, CS.ParamNo, CS.Offset); if (!US.Range.contains(CalleeRange)) { Changed = true; if (UpdateToFullSet) @@ -478,19 +507,18 @@ bool StackSafetyDataFlowAnalysis::updateOneUse(UseInfo &US, return Changed; } -void StackSafetyDataFlowAnalysis::updateOneNode( - const GlobalValue *Callee, StackSafetyInfo::FunctionInfo &FS) { +template <typename CalleeTy> +void StackSafetyDataFlowAnalysis<CalleeTy>::updateOneNode( + const CalleeTy *Callee, FunctionInfo<CalleeTy> &FS) { bool UpdateToFullSet = FS.UpdateCount > StackSafetyMaxIterations; bool Changed = false; - for (auto &AS : FS.Allocas) - Changed |= updateOneUse(AS.Use, UpdateToFullSet); - for (auto &PS : FS.Params) - Changed |= updateOneUse(PS.Use, UpdateToFullSet); + for (auto &KV : FS.Params) + Changed |= updateOneUse(KV.second, UpdateToFullSet); if (Changed) { LLVM_DEBUG(dbgs() << "=== update [" << FS.UpdateCount - << (UpdateToFullSet ? ", full-set" : "") << "] " - << FS.getName() << "\n"); + << (UpdateToFullSet ? ", full-set" : "") << "] " << &FS + << "\n"); // Callers of this function may need updating. for (auto &CallerID : Callers[Callee]) WorkList.insert(CallerID); @@ -499,19 +527,14 @@ void StackSafetyDataFlowAnalysis::updateOneNode( } } -void StackSafetyDataFlowAnalysis::runDataFlow() { - Callers.clear(); - WorkList.clear(); - - SmallVector<const GlobalValue *, 16> Callees; +template <typename CalleeTy> +void StackSafetyDataFlowAnalysis<CalleeTy>::runDataFlow() { + SmallVector<const CalleeTy *, 16> Callees; for (auto &F : Functions) { Callees.clear(); - StackSafetyInfo::FunctionInfo &FS = F.second; - for (auto &AS : FS.Allocas) - for (auto &CS : AS.Use.Calls) - Callees.push_back(CS.Callee); - for (auto &PS : FS.Params) - for (auto &CS : PS.Use.Calls) + auto &FS = F.second; + for (auto &KV : FS.Params) + for (auto &CS : KV.second.Calls) Callees.push_back(CS.Callee); llvm::sort(Callees); @@ -524,65 +547,284 @@ void StackSafetyDataFlowAnalysis::runDataFlow() { updateAllNodes(); while (!WorkList.empty()) { - const GlobalValue *Callee = WorkList.back(); + const CalleeTy *Callee = WorkList.back(); WorkList.pop_back(); updateOneNode(Callee); } } #ifndef NDEBUG -void StackSafetyDataFlowAnalysis::verifyFixedPoint() { +template <typename CalleeTy> +void StackSafetyDataFlowAnalysis<CalleeTy>::verifyFixedPoint() { WorkList.clear(); updateAllNodes(); assert(WorkList.empty()); } #endif -StackSafetyGlobalInfo StackSafetyDataFlowAnalysis::run() { +template <typename CalleeTy> +const typename StackSafetyDataFlowAnalysis<CalleeTy>::FunctionMap & +StackSafetyDataFlowAnalysis<CalleeTy>::run() { runDataFlow(); LLVM_DEBUG(verifyFixedPoint()); + return Functions; +} - StackSafetyGlobalInfo SSI; - for (auto &F : Functions) - SSI.emplace(F.first, std::move(F.second)); - return SSI; +FunctionSummary *resolveCallee(GlobalValueSummary *S) { + while (S) { + if (!S->isLive() || !S->isDSOLocal()) + return nullptr; + if (FunctionSummary *FS = dyn_cast<FunctionSummary>(S)) + return FS; + AliasSummary *AS = dyn_cast<AliasSummary>(S); + if (!AS) + return nullptr; + S = AS->getBaseObject(); + if (S == AS) + return nullptr; + } + return nullptr; } -void print(const StackSafetyGlobalInfo &SSI, raw_ostream &O, const Module &M) { - size_t Count = 0; - for (auto &F : M.functions()) - if (!F.isDeclaration()) { - SSI.find(&F)->second.print(O); - O << "\n"; - ++Count; +const Function *findCalleeInModule(const GlobalValue *GV) { + while (GV) { + if (GV->isDeclaration() || GV->isInterposable() || !GV->isDSOLocal()) + return nullptr; + if (const Function *F = dyn_cast<Function>(GV)) + return F; + const GlobalAlias *A = dyn_cast<GlobalAlias>(GV); + if (!A) + return nullptr; + GV = A->getBaseObject(); + if (GV == A) + return nullptr; + } + return nullptr; +} + +GlobalValueSummary *getGlobalValueSummary(const ModuleSummaryIndex *Index, + uint64_t ValueGUID) { + auto VI = Index->getValueInfo(ValueGUID); + if (!VI || VI.getSummaryList().empty()) + return nullptr; + assert(VI.getSummaryList().size() == 1); + auto &Summary = VI.getSummaryList()[0]; + return Summary.get(); +} + +const ConstantRange *findParamAccess(const FunctionSummary &FS, + uint32_t ParamNo) { + assert(FS.isLive()); + assert(FS.isDSOLocal()); + for (auto &PS : FS.paramAccesses()) + if (ParamNo == PS.ParamNo) + return &PS.Use; + return nullptr; +} + +void resolveAllCalls(UseInfo<GlobalValue> &Use, + const ModuleSummaryIndex *Index) { + ConstantRange FullSet(Use.Range.getBitWidth(), true); + for (auto &C : Use.Calls) { + const Function *F = findCalleeInModule(C.Callee); + if (F) { + C.Callee = F; + continue; } - for (auto &A : M.aliases()) { - SSI.find(&A)->second.print(O); - O << "\n"; - ++Count; + + if (!Index) + return Use.updateRange(FullSet); + GlobalValueSummary *GVS = getGlobalValueSummary(Index, C.Callee->getGUID()); + + FunctionSummary *FS = resolveCallee(GVS); + if (!FS) + return Use.updateRange(FullSet); + const ConstantRange *Found = findParamAccess(*FS, C.ParamNo); + if (!Found) + return Use.updateRange(FullSet); + ConstantRange Access = Found->sextOrTrunc(Use.Range.getBitWidth()); + Use.updateRange(addOverflowNever(Access, C.Offset)); + C.Callee = nullptr; } - assert(Count == SSI.size() && "Unexpected functions in the result"); + + Use.Calls.erase(std::remove_if(Use.Calls.begin(), Use.Calls.end(), + [](auto &T) { return !T.Callee; }), + Use.Calls.end()); +} + +GVToSSI createGlobalStackSafetyInfo( + std::map<const GlobalValue *, FunctionInfo<GlobalValue>> Functions, + const ModuleSummaryIndex *Index) { + GVToSSI SSI; + if (Functions.empty()) + return SSI; + + // FIXME: Simplify printing and remove copying here. + auto Copy = Functions; + + for (auto &FnKV : Copy) + for (auto &KV : FnKV.second.Params) + resolveAllCalls(KV.second, Index); + + uint32_t PointerSize = Copy.begin() + ->first->getParent() + ->getDataLayout() + .getMaxPointerSizeInBits(); + StackSafetyDataFlowAnalysis<GlobalValue> SSDFA(PointerSize, std::move(Copy)); + + for (auto &F : SSDFA.run()) { + auto FI = F.second; + auto &SrcF = Functions[F.first]; + for (auto &KV : FI.Allocas) { + auto &A = KV.second; + resolveAllCalls(A, Index); + for (auto &C : A.Calls) { + A.updateRange( + SSDFA.getArgumentAccessRange(C.Callee, C.ParamNo, C.Offset)); + } + // FIXME: This is needed only to preserve calls in print() results. + A.Calls = SrcF.Allocas.find(KV.first)->second.Calls; + } + for (auto &KV : FI.Params) { + auto &P = KV.second; + P.Calls = SrcF.Params.find(KV.first)->second.Calls; + } + SSI[F.first] = std::move(FI); + } + + return SSI; } } // end anonymous namespace StackSafetyInfo::StackSafetyInfo() = default; + +StackSafetyInfo::StackSafetyInfo(Function *F, + std::function<ScalarEvolution &()> GetSE) + : F(F), GetSE(GetSE) {} + StackSafetyInfo::StackSafetyInfo(StackSafetyInfo &&) = default; -StackSafetyInfo &StackSafetyInfo::operator=(StackSafetyInfo &&) = default; -StackSafetyInfo::StackSafetyInfo(FunctionInfo &&Info) - : Info(new FunctionInfo(std::move(Info))) {} +StackSafetyInfo &StackSafetyInfo::operator=(StackSafetyInfo &&) = default; StackSafetyInfo::~StackSafetyInfo() = default; -void StackSafetyInfo::print(raw_ostream &O) const { Info->print(O); } +const StackSafetyInfo::InfoTy &StackSafetyInfo::getInfo() const { + if (!Info) { + StackSafetyLocalAnalysis SSLA(*F, GetSE()); + Info.reset(new InfoTy{SSLA.run()}); + } + return *Info; +} + +void StackSafetyInfo::print(raw_ostream &O) const { + getInfo().Info.print(O, F->getName(), dyn_cast<Function>(F)); +} + +const StackSafetyGlobalInfo::InfoTy &StackSafetyGlobalInfo::getInfo() const { + if (!Info) { + std::map<const GlobalValue *, FunctionInfo<GlobalValue>> Functions; + for (auto &F : M->functions()) { + if (!F.isDeclaration()) { + auto FI = GetSSI(F).getInfo().Info; + Functions.emplace(&F, std::move(FI)); + } + } + Info.reset(new InfoTy{ + createGlobalStackSafetyInfo(std::move(Functions), Index), {}}); + for (auto &FnKV : Info->Info) { + for (auto &KV : FnKV.second.Allocas) { + ++NumAllocaTotal; + const AllocaInst *AI = KV.first; + if (getStaticAllocaSizeRange(*AI).contains(KV.second.Range)) { + Info->SafeAllocas.insert(AI); + ++NumAllocaStackSafe; + } + } + } + if (StackSafetyPrint) + print(errs()); + } + return *Info; +} + +std::vector<FunctionSummary::ParamAccess> +StackSafetyInfo::getParamAccesses() const { + // Implementation transforms internal representation of parameter information + // into FunctionSummary format. + std::vector<FunctionSummary::ParamAccess> ParamAccesses; + for (const auto &KV : getInfo().Info.Params) { + auto &PS = KV.second; + // Parameter accessed by any or unknown offset, represented as FullSet by + // StackSafety, is handled as the parameter for which we have no + // StackSafety info at all. So drop it to reduce summary size. + if (PS.Range.isFullSet()) + continue; + + ParamAccesses.emplace_back(KV.first, PS.Range); + FunctionSummary::ParamAccess &Param = ParamAccesses.back(); + + Param.Calls.reserve(PS.Calls.size()); + for (auto &C : PS.Calls) { + // Parameter forwarded into another function by any or unknown offset + // will make ParamAccess::Range as FullSet anyway. So we can drop the + // entire parameter like we did above. + // TODO(vitalybuka): Return already filtered parameters from getInfo(). + if (C.Offset.isFullSet()) { + ParamAccesses.pop_back(); + break; + } + Param.Calls.emplace_back(C.ParamNo, C.Callee->getGUID(), C.Offset); + } + } + return ParamAccesses; +} + +StackSafetyGlobalInfo::StackSafetyGlobalInfo() = default; + +StackSafetyGlobalInfo::StackSafetyGlobalInfo( + Module *M, std::function<const StackSafetyInfo &(Function &F)> GetSSI, + const ModuleSummaryIndex *Index) + : M(M), GetSSI(GetSSI), Index(Index) { + if (StackSafetyRun) + getInfo(); +} + +StackSafetyGlobalInfo::StackSafetyGlobalInfo(StackSafetyGlobalInfo &&) = + default; + +StackSafetyGlobalInfo & +StackSafetyGlobalInfo::operator=(StackSafetyGlobalInfo &&) = default; + +StackSafetyGlobalInfo::~StackSafetyGlobalInfo() = default; + +bool StackSafetyGlobalInfo::isSafe(const AllocaInst &AI) const { + const auto &Info = getInfo(); + return Info.SafeAllocas.count(&AI); +} + +void StackSafetyGlobalInfo::print(raw_ostream &O) const { + auto &SSI = getInfo().Info; + if (SSI.empty()) + return; + const Module &M = *SSI.begin()->first->getParent(); + for (auto &F : M.functions()) { + if (!F.isDeclaration()) { + SSI.find(&F)->second.print(O, F.getName(), &F); + O << "\n"; + } + } +} + +LLVM_DUMP_METHOD void StackSafetyGlobalInfo::dump() const { print(dbgs()); } AnalysisKey StackSafetyAnalysis::Key; StackSafetyInfo StackSafetyAnalysis::run(Function &F, FunctionAnalysisManager &AM) { - StackSafetyLocalAnalysis SSLA(F, AM.getResult<ScalarEvolutionAnalysis>(F)); - return SSLA.run(); + return StackSafetyInfo(&F, [&AM, &F]() -> ScalarEvolution & { + return AM.getResult<ScalarEvolutionAnalysis>(F); + }); } PreservedAnalyses StackSafetyPrinterPass::run(Function &F, @@ -599,7 +841,7 @@ StackSafetyInfoWrapperPass::StackSafetyInfoWrapperPass() : FunctionPass(ID) { } void StackSafetyInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<ScalarEvolutionWrapperPass>(); + AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); AU.setPreservesAll(); } @@ -608,9 +850,8 @@ void StackSafetyInfoWrapperPass::print(raw_ostream &O, const Module *M) const { } bool StackSafetyInfoWrapperPass::runOnFunction(Function &F) { - StackSafetyLocalAnalysis SSLA( - F, getAnalysis<ScalarEvolutionWrapperPass>().getSE()); - SSI = StackSafetyInfo(SSLA.run()); + auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + SSI = {&F, [SE]() -> ScalarEvolution & { return *SE; }}; return false; } @@ -618,20 +859,20 @@ AnalysisKey StackSafetyGlobalAnalysis::Key; StackSafetyGlobalInfo StackSafetyGlobalAnalysis::run(Module &M, ModuleAnalysisManager &AM) { + // FIXME: Lookup Module Summary. FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); - - StackSafetyDataFlowAnalysis SSDFA( - M, [&FAM](Function &F) -> const StackSafetyInfo & { - return FAM.getResult<StackSafetyAnalysis>(F); - }); - return SSDFA.run(); + return {&M, + [&FAM](Function &F) -> const StackSafetyInfo & { + return FAM.getResult<StackSafetyAnalysis>(F); + }, + nullptr}; } PreservedAnalyses StackSafetyGlobalPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { OS << "'Stack Safety Analysis' for module '" << M.getName() << "'\n"; - print(AM.getResult<StackSafetyGlobalAnalysis>(M), OS, M); + AM.getResult<StackSafetyGlobalAnalysis>(M).print(OS); return PreservedAnalyses::all(); } @@ -643,25 +884,96 @@ StackSafetyGlobalInfoWrapperPass::StackSafetyGlobalInfoWrapperPass() *PassRegistry::getPassRegistry()); } +StackSafetyGlobalInfoWrapperPass::~StackSafetyGlobalInfoWrapperPass() = default; + void StackSafetyGlobalInfoWrapperPass::print(raw_ostream &O, const Module *M) const { - ::print(SSI, O, *M); + SSGI.print(O); } void StackSafetyGlobalInfoWrapperPass::getAnalysisUsage( AnalysisUsage &AU) const { + AU.setPreservesAll(); AU.addRequired<StackSafetyInfoWrapperPass>(); } bool StackSafetyGlobalInfoWrapperPass::runOnModule(Module &M) { - StackSafetyDataFlowAnalysis SSDFA( - M, [this](Function &F) -> const StackSafetyInfo & { - return getAnalysis<StackSafetyInfoWrapperPass>(F).getResult(); - }); - SSI = SSDFA.run(); + const ModuleSummaryIndex *ImportSummary = nullptr; + if (auto *IndexWrapperPass = + getAnalysisIfAvailable<ImmutableModuleSummaryIndexWrapperPass>()) + ImportSummary = IndexWrapperPass->getIndex(); + + SSGI = {&M, + [this](Function &F) -> const StackSafetyInfo & { + return getAnalysis<StackSafetyInfoWrapperPass>(F).getResult(); + }, + ImportSummary}; return false; } +bool llvm::needsParamAccessSummary(const Module &M) { + if (StackSafetyRun) + return true; + for (auto &F : M.functions()) + if (F.hasFnAttribute(Attribute::SanitizeMemTag)) + return true; + return false; +} + +void llvm::generateParamAccessSummary(ModuleSummaryIndex &Index) { + const ConstantRange FullSet(FunctionSummary::ParamAccess::RangeWidth, true); + std::map<const FunctionSummary *, FunctionInfo<FunctionSummary>> Functions; + + // Convert the ModuleSummaryIndex to a FunctionMap + for (auto &GVS : Index) { + for (auto &GV : GVS.second.SummaryList) { + FunctionSummary *FS = dyn_cast<FunctionSummary>(GV.get()); + if (!FS) + continue; + if (FS->isLive() && FS->isDSOLocal()) { + FunctionInfo<FunctionSummary> FI; + for (auto &PS : FS->paramAccesses()) { + auto &US = + FI.Params + .emplace(PS.ParamNo, FunctionSummary::ParamAccess::RangeWidth) + .first->second; + US.Range = PS.Use; + for (auto &Call : PS.Calls) { + assert(!Call.Offsets.isFullSet()); + FunctionSummary *S = resolveCallee( + Index.findSummaryInModule(Call.Callee, FS->modulePath())); + if (!S) { + US.Range = FullSet; + US.Calls.clear(); + break; + } + US.Calls.emplace_back(S, Call.ParamNo, Call.Offsets); + } + } + Functions.emplace(FS, std::move(FI)); + } + // Reset data for all summaries. Alive and DSO local will be set back from + // of data flow results below. Anything else will not be accessed + // by ThinLTO backend, so we can save on bitcode size. + FS->setParamAccesses({}); + } + } + StackSafetyDataFlowAnalysis<FunctionSummary> SSDFA( + FunctionSummary::ParamAccess::RangeWidth, std::move(Functions)); + for (auto &KV : SSDFA.run()) { + std::vector<FunctionSummary::ParamAccess> NewParams; + NewParams.reserve(KV.second.Params.size()); + for (auto &Param : KV.second.Params) { + NewParams.emplace_back(); + FunctionSummary::ParamAccess &New = NewParams.back(); + New.ParamNo = Param.first; + New.Use = Param.second.Range; // Only range is needed. + } + const_cast<FunctionSummary *>(KV.first)->setParamAccesses( + std::move(NewParams)); + } +} + static const char LocalPassArg[] = "stack-safety-local"; static const char LocalPassName[] = "Stack Safety Local Analysis"; INITIALIZE_PASS_BEGIN(StackSafetyInfoWrapperPass, LocalPassArg, LocalPassName, @@ -672,7 +984,8 @@ INITIALIZE_PASS_END(StackSafetyInfoWrapperPass, LocalPassArg, LocalPassName, static const char GlobalPassName[] = "Stack Safety Analysis"; INITIALIZE_PASS_BEGIN(StackSafetyGlobalInfoWrapperPass, DEBUG_TYPE, - GlobalPassName, false, false) + GlobalPassName, false, true) INITIALIZE_PASS_DEPENDENCY(StackSafetyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ImmutableModuleSummaryIndexWrapperPass) INITIALIZE_PASS_END(StackSafetyGlobalInfoWrapperPass, DEBUG_TYPE, - GlobalPassName, false, false) + GlobalPassName, false, true) diff --git a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp index 8447dc87069db..ccf520dcea66e 100644 --- a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp @@ -244,12 +244,12 @@ struct DivergencePropagator { ); auto ItBeginRPO = FuncRPOT.begin(); + auto ItEndRPO = FuncRPOT.end(); // skip until term (TODO RPOT won't let us start at @term directly) - for (; *ItBeginRPO != &RootBlock; ++ItBeginRPO) {} - - auto ItEndRPO = FuncRPOT.end(); - assert(ItBeginRPO != ItEndRPO); + for (; *ItBeginRPO != &RootBlock; ++ItBeginRPO) { + assert(ItBeginRPO != ItEndRPO && "Unable to find RootBlock"); + } // propagate definitions at the immediate successors of the node in RPO auto ItBlockRPO = ItBeginRPO; @@ -369,7 +369,7 @@ SyncDependenceAnalysis::join_blocks(const Instruction &Term) { // compute all join points DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI}; const auto &TermBlock = *Term.getParent(); - auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>( + auto JoinBlocks = Propagator.computeJoinPoints<const_succ_range>( TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock)); auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks)); diff --git a/llvm/lib/Analysis/SyntheticCountsUtils.cpp b/llvm/lib/Analysis/SyntheticCountsUtils.cpp index 22766e5f07f54..a3edce76cd882 100644 --- a/llvm/lib/Analysis/SyntheticCountsUtils.cpp +++ b/llvm/lib/Analysis/SyntheticCountsUtils.cpp @@ -14,7 +14,6 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp new file mode 100644 index 0000000000000..19e6d626e2386 --- /dev/null +++ b/llvm/lib/Analysis/TFUtils.cpp @@ -0,0 +1,289 @@ +//===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements utilities for interfacing with tensorflow C APIs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Utils/TFUtils.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/raw_ostream.h" + +#include "tensorflow/c/c_api.h" +#include "tensorflow/c/c_api_experimental.h" + +#include <cassert> + +using namespace llvm; + +namespace { + +using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>; +using TFSessionOptionsPtr = + std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>; +using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>; + +struct TFInitializer { + TFInitializer() { + assert(!IsInitialized && "TFInitialized should be called only once"); + int Argc = 1; + const char *Name = ""; + const char **NamePtr = &Name; + TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr)); + IsInitialized = true; + } + bool IsInitialized = false; +}; + +llvm::ManagedStatic<TFInitializer> TFLibInitializer; + +bool ensureInitTF() { return TFLibInitializer->IsInitialized; } + +TFGraphPtr createTFGraph() { + return TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph); +} + +TFStatusPtr createTFStatus() { + return TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus); +} + +TFSessionOptionsPtr createTFSessionOptions() { + return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions); +} +} // namespace + +namespace llvm { +class EvaluationResultImpl { +public: + EvaluationResultImpl(size_t OutputSize) + : OutputSize(OutputSize), Output(OutputSize){}; + + ~EvaluationResultImpl() { + for (auto *P : Output) + if (P) + TF_DeleteTensor(P); + } + + EvaluationResultImpl(const EvaluationResultImpl &) = delete; + EvaluationResultImpl(EvaluationResultImpl &&Other) = delete; + std::vector<TF_Tensor *> &getOutput() { return Output; } + +private: + const size_t OutputSize; + std::vector<TF_Tensor *> Output; +}; + +class TFModelEvaluatorImpl { +public: + TFModelEvaluatorImpl(StringRef SavedModelPath, + const std::vector<std::string> &InputNames, + const std::vector<std::string> &OutputNames, + const char *Tags); + + bool isValid() const { return IsValid; } + size_t OutputSize() const { return OutputFeed.size(); } + + void evaluate(TF_Tensor **Output, TF_Status *Status) { + TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(), + Input.size(), OutputFeed.data(), Output, OutputFeed.size(), + nullptr, 0, nullptr, Status); + } + + void initInput(size_t Index, TF_DataType Type, + const std::vector<int64_t> &Dimensions); + const std::vector<TF_Tensor *> &getInput() const { return Input; } + + ~TFModelEvaluatorImpl(); + +private: + /// The objects necessary for carrying out an evaluation of the SavedModel. + /// They are expensive to set up, and we maintain them accross all the + /// evaluations of the model. + TF_Session *Session = nullptr; + TFGraphPtr Graph; + TFSessionOptionsPtr Options; + + /// The specification of the input nodes. + std::vector<TF_Output> InputFeed; + + /// The input tensors. They must match by index of the corresponding InputFeed + /// value. We set up the tensors once and just mutate theirs scalars before + /// each evaluation. The input tensors keep their value after an evaluation. + std::vector<TF_Tensor *> Input; + + /// The specification of the output nodes. When evaluating, the tensors in the + /// output tensor vector must match by index the corresponding element in the + /// OutputFeed. + std::vector<TF_Output> OutputFeed; + + void invalidate() { IsValid = false; } + + bool IsValid = true; + + /// Reusable utility for ensuring we can bind the requested Name to a node in + /// the SavedModel Graph. + bool checkReportAndInvalidate(const TF_Output &Output, StringRef Name); +}; +} // namespace llvm + +TFModelEvaluatorImpl::TFModelEvaluatorImpl( + StringRef SavedModelPath, const std::vector<std::string> &InputNames, + const std::vector<std::string> &OutputNames, const char *Tags) + : Graph(createTFGraph()), Options(createTFSessionOptions()), + InputFeed(InputNames.size()), Input(InputNames.size()), + OutputFeed(OutputNames.size()) { + if (!ensureInitTF()) { + errs() << "Tensorflow should have been initialized"; + return; + } + auto Status = createTFStatus(); + + Session = TF_LoadSessionFromSavedModel(Options.get(), nullptr, + SavedModelPath.str().c_str(), &Tags, 1, + Graph.get(), nullptr, Status.get()); + if (TF_GetCode(Status.get()) != TF_Code::TF_OK) { + errs() << TF_Message(Status.get()); + invalidate(); + } + for (size_t I = 0; I < InputNames.size(); ++I) { + InputFeed[I] = { + TF_GraphOperationByName(Graph.get(), (InputNames[I]).c_str()), 0}; + if (!checkReportAndInvalidate(InputFeed[I], InputNames[I])) + return; + } + for (size_t I = 0; I < OutputNames.size(); ++I) { + OutputFeed[I] = { + TF_GraphOperationByName(Graph.get(), (OutputNames[I]).c_str()), 0}; + if (!checkReportAndInvalidate(OutputFeed[I], OutputNames[I])) + return; + } +} + +TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath, + const std::vector<std::string> &InputNames, + const std::vector<std::string> &OutputNames, + const char *Tags) + : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputNames, OutputNames, + Tags)) { + if (!Impl->isValid()) + Impl.reset(); +} + +TFModelEvaluatorImpl::~TFModelEvaluatorImpl() { + for (auto *T : Input) { + TF_DeleteTensor(T); + } + if (Session == nullptr) + return; + auto Status = createTFStatus(); + TF_DeleteSession(Session, Status.get()); + Session = nullptr; + if (TF_GetCode(Status.get()) != TF_Code::TF_OK) + errs() << "Could not delete TF session"; +} + +bool TFModelEvaluatorImpl::checkReportAndInvalidate(const TF_Output &Output, + StringRef Name) { + if (Output.oper) + return true; + errs() << "Could not find TF_Output named: " + Name; + IsValid = false; + return IsValid; +} + +Optional<TFModelEvaluator::EvaluationResult> TFModelEvaluator::evaluate() { + if (!isValid()) + return None; + std::unique_ptr<EvaluationResultImpl> Ret = + std::make_unique<EvaluationResultImpl>(Impl->OutputSize()); + auto Status = createTFStatus(); + Impl->evaluate(Ret->getOutput().data(), Status.get()); + if (TF_GetCode(Status.get()) != TF_Code::TF_OK) { + errs() << TF_Message(Status.get()); + Impl.reset(); + return None; + } + return EvaluationResult(std::move(Ret)); +} + +void TFModelEvaluatorImpl::initInput(size_t Index, TF_DataType Type, + const std::vector<int64_t> &Dimensions) { + int64_t TotalSize = TF_DataTypeSize(Type); + for (auto &D : Dimensions) + TotalSize *= D; + + Input[Index] = + TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize); + std::memset(TF_TensorData(Input[Index]), 0, TotalSize); +} + +void *TFModelEvaluator::getUntypedInput(size_t Index) { + return TF_TensorData(Impl->getInput()[Index]); +} + +TFModelEvaluator::EvaluationResult::EvaluationResult( + std::unique_ptr<EvaluationResultImpl> Impl) + : Impl(std::move(Impl)) {} + +TFModelEvaluator::EvaluationResult::EvaluationResult(EvaluationResult &&Other) + : Impl(std::move(Other.Impl)) {} + +void *TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) { + return TF_TensorData(Impl->getOutput()[Index]); +} + +void TFModelEvaluator::initInput(size_t Index, int TypeIndex, + const std::vector<int64_t> &Dimensions) { + Impl->initInput(Index, static_cast<TF_DataType>(TypeIndex), Dimensions); +} + +template <> int TFModelEvaluator::getModelTypeIndex<float>() { + return TF_FLOAT; +} + +template <> int TFModelEvaluator::getModelTypeIndex<double>() { + return TF_DOUBLE; +} + +template <> int TFModelEvaluator::getModelTypeIndex<int8_t>() { + return TF_INT8; +} + +template <> int TFModelEvaluator::getModelTypeIndex<uint8_t>() { + return TF_UINT8; +} + +template <> int TFModelEvaluator::getModelTypeIndex<int16_t>() { + return TF_INT16; +} + +template <> int TFModelEvaluator::getModelTypeIndex<uint16_t>() { + return TF_UINT16; +} + +template <> int TFModelEvaluator::getModelTypeIndex<int32_t>() { + return TF_INT32; +} + +template <> int TFModelEvaluator::getModelTypeIndex<uint32_t>() { + return TF_UINT32; +} + +template <> int TFModelEvaluator::getModelTypeIndex<int64_t>() { + return TF_INT64; +} + +template <> int TFModelEvaluator::getModelTypeIndex<uint64_t>() { + return TF_UINT64; +} + +TFModelEvaluator::EvaluationResult::~EvaluationResult() {} +TFModelEvaluator::~TFModelEvaluator() {} diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index c7238db43aab6..60cfb04634c4a 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -69,11 +69,10 @@ static bool hasBcmp(const Triple &TT) { static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef<StringLiteral> StandardNames) { // Verify that the StandardNames array is in alphabetical order. - assert(std::is_sorted(StandardNames.begin(), StandardNames.end(), - [](StringRef LHS, StringRef RHS) { - return LHS < RHS; - }) && - "TargetLibraryInfoImpl function names must be sorted"); + assert( + llvm::is_sorted(StandardNames, + [](StringRef LHS, StringRef RHS) { return LHS < RHS; }) && + "TargetLibraryInfoImpl function names must be sorted"); // Set IO unlocked variants as unavailable // Set them as available per system below @@ -105,14 +104,12 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setShouldExtI32Return(ShouldExtI32Return); TLI.setShouldSignExtI32Param(ShouldSignExtI32Param); - if (T.getArch() == Triple::r600 || - T.getArch() == Triple::amdgcn) + if (T.isAMDGPU()) TLI.disableAllFunctions(); // There are no library implementations of memcpy and memset for AMD gpus and // these can be difficult to lower in the backend. - if (T.getArch() == Triple::r600 || - T.getArch() == Triple::amdgcn) { + if (T.isAMDGPU()) { TLI.setUnavailable(LibFunc_memcpy); TLI.setUnavailable(LibFunc_memset); TLI.setUnavailable(LibFunc_memset_pattern16); @@ -210,6 +207,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_logf); TLI.setUnavailable(LibFunc_modff); TLI.setUnavailable(LibFunc_powf); + TLI.setUnavailable(LibFunc_remainderf); TLI.setUnavailable(LibFunc_sinf); TLI.setUnavailable(LibFunc_sinhf); TLI.setUnavailable(LibFunc_sqrtf); @@ -239,6 +237,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_logl); TLI.setUnavailable(LibFunc_modfl); TLI.setUnavailable(LibFunc_powl); + TLI.setUnavailable(LibFunc_remainderl); TLI.setUnavailable(LibFunc_sinl); TLI.setUnavailable(LibFunc_sinhl); TLI.setUnavailable(LibFunc_sqrtl); @@ -470,6 +469,9 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_tmpfile64); // Relaxed math functions are included in math-finite.h on Linux (GLIBC). + // Note that math-finite.h is no longer supported by top-of-tree GLIBC, + // so we keep these functions around just so that they're recognized by + // the ConstantFolder. TLI.setUnavailable(LibFunc_acos_finite); TLI.setUnavailable(LibFunc_acosf_finite); TLI.setUnavailable(LibFunc_acosl_finite); @@ -659,6 +661,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, FTy.getParamType(1)->isPointerTy() && FTy.getParamType(2)->isPointerTy() && FTy.getReturnType()->isIntegerTy(32)); + case LibFunc_strlen_chk: + --NumParams; + if (!IsSizeTTy(FTy.getParamType(NumParams))) + return false; + LLVM_FALLTHROUGH; case LibFunc_strlen: return (NumParams == 1 && FTy.getParamType(0)->isPointerTy() && FTy.getReturnType()->isIntegerTy()); @@ -893,6 +900,8 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, FTy.getParamType(1)->isPointerTy()); case LibFunc_write: return (NumParams == 3 && FTy.getParamType(1)->isPointerTy()); + case LibFunc_aligned_alloc: + return (NumParams == 2 && FTy.getReturnType()->isPointerTy()); case LibFunc_bcopy: case LibFunc_bcmp: return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() && @@ -1209,6 +1218,14 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t: // void operator delete[](void*, align_val_t, nothrow) case LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t: + // void operator delete(void*, unsigned int, align_val_t) + case LibFunc_ZdlPvjSt11align_val_t: + // void operator delete(void*, unsigned long, align_val_t) + case LibFunc_ZdlPvmSt11align_val_t: + // void operator delete[](void*, unsigned int, align_val_t); + case LibFunc_ZdaPvjSt11align_val_t: + // void operator delete[](void*, unsigned long, align_val_t); + case LibFunc_ZdaPvmSt11align_val_t: return (NumParams == 3 && FTy.getParamType(0)->isPointerTy()); case LibFunc_memset_pattern16: @@ -1332,6 +1349,9 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_round: case LibFunc_roundf: case LibFunc_roundl: + case LibFunc_roundeven: + case LibFunc_roundevenf: + case LibFunc_roundevenl: case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinh: @@ -1374,6 +1394,9 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_fmod: case LibFunc_fmodf: case LibFunc_fmodl: + case LibFunc_remainder: + case LibFunc_remainderf: + case LibFunc_remainderl: case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl: @@ -1478,9 +1501,9 @@ bool TargetLibraryInfoImpl::getLibFunc(const Function &FDecl, LibFunc &F) const { // Intrinsics don't overlap w/libcalls; if our module has a large number of // intrinsics, this ends up being an interesting compile time win since we - // avoid string normalization and comparison. + // avoid string normalization and comparison. if (FDecl.isIntrinsic()) return false; - + const DataLayout *DL = FDecl.getParent() ? &FDecl.getParent()->getDataLayout() : nullptr; return getLibFunc(FDecl.getName(), F) && diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index f2c63f789d892..2f051e53790b1 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -11,8 +11,8 @@ #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -42,18 +42,109 @@ struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> { explicit NoTTIImpl(const DataLayout &DL) : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {} }; -} +} // namespace bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) { // If the loop has irreducible control flow, it can not be converted to // Hardware loop. - LoopBlocksRPO RPOT(L); + LoopBlocksRPO RPOT(L); RPOT.perform(&LI); if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI)) return false; return true; } +IntrinsicCostAttributes::IntrinsicCostAttributes(const IntrinsicInst &I) : + II(&I), RetTy(I.getType()), IID(I.getIntrinsicID()) { + + FunctionType *FTy = I.getCalledFunction()->getFunctionType(); + ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); + Arguments.insert(Arguments.begin(), I.arg_begin(), I.arg_end()); + if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) + FMF = FPMO->getFastMathFlags(); +} + +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, + const CallBase &CI) : + II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id) { + + if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI)) + FMF = FPMO->getFastMathFlags(); + + FunctionType *FTy = + CI.getCalledFunction()->getFunctionType(); + ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); +} + +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, + const CallBase &CI, + unsigned Factor) : + RetTy(CI.getType()), IID(Id), VF(Factor) { + + if (auto *FPMO = dyn_cast<FPMathOperator>(&CI)) + FMF = FPMO->getFastMathFlags(); + + Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end()); + FunctionType *FTy = + CI.getCalledFunction()->getFunctionType(); + ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); +} + +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, + const CallBase &CI, + unsigned Factor, + unsigned ScalarCost) : + RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) { + + if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI)) + FMF = FPMO->getFastMathFlags(); + + Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end()); + FunctionType *FTy = + CI.getCalledFunction()->getFunctionType(); + ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); +} + +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, + ArrayRef<Type *> Tys, + FastMathFlags Flags) : + RetTy(RTy), IID(Id), FMF(Flags) { + ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end()); +} + +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, + ArrayRef<Type *> Tys, + FastMathFlags Flags, + unsigned ScalarCost) : + RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) { + ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end()); +} + +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, + ArrayRef<Type *> Tys, + FastMathFlags Flags, + unsigned ScalarCost, + const IntrinsicInst *I) : + II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) { + ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end()); +} + +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, + ArrayRef<Type *> Tys) : + RetTy(RTy), IID(Id) { + ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end()); +} + +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty, + ArrayRef<const Value *> Args) + : RetTy(Ty), IID(Id) { + + Arguments.insert(Arguments.begin(), Args.begin(), Args.end()); + ParamTys.reserve(Arguments.size()); + for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) + ParamTys.push_back(Arguments[Idx]->getType()); +} + bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop, @@ -146,28 +237,6 @@ TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) { return *this; } -int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, - Type *OpTy) const { - int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy); - assert(Cost >= 0 && "TTI should not produce negative costs!"); - return Cost; -} - -int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs, - const User *U) const { - int Cost = TTIImpl->getCallCost(FTy, NumArgs, U); - assert(Cost >= 0 && "TTI should not produce negative costs!"); - return Cost; -} - -int TargetTransformInfo::getCallCost(const Function *F, - ArrayRef<const Value *> Arguments, - const User *U) const { - int Cost = TTIImpl->getCallCost(F, Arguments, U); - assert(Cost >= 0 && "TTI should not produce negative costs!"); - return Cost; -} - unsigned TargetTransformInfo::getInliningThresholdMultiplier() const { return TTIImpl->getInliningThresholdMultiplier(); } @@ -177,34 +246,23 @@ int TargetTransformInfo::getInlinerVectorBonusPercent() const { } int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef<const Value *> Operands) const { - return TTIImpl->getGEPCost(PointeeType, Ptr, Operands); -} - -int TargetTransformInfo::getExtCost(const Instruction *I, - const Value *Src) const { - return TTIImpl->getExtCost(I, Src); -} - -int TargetTransformInfo::getIntrinsicCost( - Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments, - const User *U) const { - int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U); - assert(Cost >= 0 && "TTI should not produce negative costs!"); - return Cost; + ArrayRef<const Value *> Operands, + TTI::TargetCostKind CostKind) const { + return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind); } -unsigned -TargetTransformInfo::getEstimatedNumberOfCaseClusters( +unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters( const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const { return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI); } int TargetTransformInfo::getUserCost(const User *U, - ArrayRef<const Value *> Operands) const { - int Cost = TTIImpl->getUserCost(U, Operands); - assert(Cost >= 0 && "TTI should not produce negative costs!"); + ArrayRef<const Value *> Operands, + enum TargetCostKind CostKind) const { + int Cost = TTIImpl->getUserCost(U, Operands, CostKind); + assert((CostKind == TTI::TCK_RecipThroughput || Cost >= 0) && + "TTI should not produce negative costs!"); return Cost; } @@ -212,6 +270,10 @@ bool TargetTransformInfo::hasBranchDivergence() const { return TTIImpl->hasBranchDivergence(); } +bool TargetTransformInfo::useGPUDivergenceAnalysis() const { + return TTIImpl->useGPUDivergenceAnalysis(); +} + bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const { return TTIImpl->isSourceOfDivergence(V); } @@ -225,12 +287,17 @@ unsigned TargetTransformInfo::getFlatAddressSpace() const { } bool TargetTransformInfo::collectFlatAddressOperands( - SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const { + SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const { return TTIImpl->collectFlatAddressOperands(OpIndexes, IID); } -bool TargetTransformInfo::rewriteIntrinsicWithAddressSpace( - IntrinsicInst *II, Value *OldV, Value *NewV) const { +bool TargetTransformInfo::isNoopAddrSpaceCast(unsigned FromAS, + unsigned ToAS) const { + return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS); +} + +Value *TargetTransformInfo::rewriteIntrinsicWithAddressSpace( + IntrinsicInst *II, Value *OldV, Value *NewV) const { return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); } @@ -239,22 +306,32 @@ bool TargetTransformInfo::isLoweredToCall(const Function *F) const { } bool TargetTransformInfo::isHardwareLoopProfitable( - Loop *L, ScalarEvolution &SE, AssumptionCache &AC, - TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const { + Loop *L, ScalarEvolution &SE, AssumptionCache &AC, + TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const { return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); } -bool TargetTransformInfo::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, - ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, - DominatorTree *DT, const LoopAccessInfo *LAI) const { +bool TargetTransformInfo::preferPredicateOverEpilogue( + Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, + TargetLibraryInfo *TLI, DominatorTree *DT, + const LoopAccessInfo *LAI) const { return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); } +bool TargetTransformInfo::emitGetActiveLaneMask() const { + return TTIImpl->emitGetActiveLaneMask(); +} + void TargetTransformInfo::getUnrollingPreferences( Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const { return TTIImpl->getUnrollingPreferences(L, SE, UP); } +void TargetTransformInfo::getPeelingPreferences(Loop *L, ScalarEvolution &SE, + PeelingPreferences &PP) const { + return TTIImpl->getPeelingPreferences(L, SE, PP); +} + bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { return TTIImpl->isLegalAddImmediate(Imm); } @@ -265,8 +342,7 @@ bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const { bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, - int64_t Scale, + bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I) const { return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, @@ -277,6 +353,10 @@ bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const { return TTIImpl->isLSRCostLess(C1, C2); } +bool TargetTransformInfo::isProfitableLSRChainElement(Instruction *I) const { + return TTIImpl->isProfitableLSRChainElement(I); +} + bool TargetTransformInfo::canMacroFuseCmp() const { return TTIImpl->canMacroFuseCmp(); } @@ -297,12 +377,12 @@ bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const { } bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, - MaybeAlign Alignment) const { + Align Alignment) const { return TTIImpl->isLegalMaskedStore(DataType, Alignment); } bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType, - MaybeAlign Alignment) const { + Align Alignment) const { return TTIImpl->isLegalMaskedLoad(DataType, Alignment); } @@ -316,12 +396,12 @@ bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const { } bool TargetTransformInfo::isLegalMaskedGather(Type *DataType, - MaybeAlign Alignment) const { + Align Alignment) const { return TTIImpl->isLegalMaskedGather(DataType, Alignment); } bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType, - MaybeAlign Alignment) const { + Align Alignment) const { return TTIImpl->isLegalMaskedScatter(DataType, Alignment); } @@ -348,8 +428,7 @@ bool TargetTransformInfo::prefersVectorizedAddressing() const { int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, - int64_t Scale, + bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const { int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace); @@ -378,7 +457,8 @@ bool TargetTransformInfo::isTypeLegal(Type *Ty) const { bool TargetTransformInfo::shouldBuildLookupTables() const { return TTIImpl->shouldBuildLookupTables(); } -bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const { +bool TargetTransformInfo::shouldBuildLookupTablesForConstant( + Constant *C) const { return TTIImpl->shouldBuildLookupTablesForConstant(C); } @@ -386,14 +466,15 @@ bool TargetTransformInfo::useColdCCForColdCall(Function &F) const { return TTIImpl->useColdCCForColdCall(F); } -unsigned TargetTransformInfo:: -getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const { - return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract); +unsigned +TargetTransformInfo::getScalarizationOverhead(VectorType *Ty, + const APInt &DemandedElts, + bool Insert, bool Extract) const { + return TTIImpl->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); } -unsigned TargetTransformInfo:: -getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, - unsigned VF) const { +unsigned TargetTransformInfo::getOperandsScalarizationOverhead( + ArrayRef<const Value *> Args, unsigned VF) const { return TTIImpl->getOperandsScalarizationOverhead(Args, VF); } @@ -401,7 +482,8 @@ bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const { return TTIImpl->supportsEfficientVectorElementLoadStore(); } -bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const { +bool TargetTransformInfo::enableAggressiveInterleaving( + bool LoopHasReductions) const { return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); } @@ -427,8 +509,8 @@ bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned AddressSpace, unsigned Alignment, bool *Fast) const { - return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace, - Alignment, Fast); + return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, + AddressSpace, Alignment, Fast); } TargetTransformInfo::PopcntSupportKind @@ -458,22 +540,27 @@ int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, return Cost; } -int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { - int Cost = TTIImpl->getIntImmCost(Imm, Ty); +int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getIntImmCost(Imm, Ty, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -int TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) const { - int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty); +int +TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -int TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) const { - int Cost = TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty); +int +TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -482,11 +569,12 @@ unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const { return TTIImpl->getNumberOfRegisters(ClassID); } -unsigned TargetTransformInfo::getRegisterClassForType(bool Vector, Type *Ty) const { +unsigned TargetTransformInfo::getRegisterClassForType(bool Vector, + Type *Ty) const { return TTIImpl->getRegisterClassForType(Vector, Ty); } -const char* TargetTransformInfo::getRegisterClassName(unsigned ClassID) const { +const char *TargetTransformInfo::getRegisterClassName(unsigned ClassID) const { return TTIImpl->getRegisterClassName(ClassID); } @@ -516,13 +604,13 @@ unsigned TargetTransformInfo::getCacheLineSize() const { return TTIImpl->getCacheLineSize(); } -llvm::Optional<unsigned> TargetTransformInfo::getCacheSize(CacheLevel Level) - const { +llvm::Optional<unsigned> +TargetTransformInfo::getCacheSize(CacheLevel Level) const { return TTIImpl->getCacheSize(Level); } -llvm::Optional<unsigned> TargetTransformInfo::getCacheAssociativity( - CacheLevel Level) const { +llvm::Optional<unsigned> +TargetTransformInfo::getCacheAssociativity(CacheLevel Level) const { return TTIImpl->getCacheAssociativity(Level); } @@ -530,24 +618,32 @@ unsigned TargetTransformInfo::getPrefetchDistance() const { return TTIImpl->getPrefetchDistance(); } -unsigned TargetTransformInfo::getMinPrefetchStride() const { - return TTIImpl->getMinPrefetchStride(); +unsigned TargetTransformInfo::getMinPrefetchStride( + unsigned NumMemAccesses, unsigned NumStridedMemAccesses, + unsigned NumPrefetches, bool HasCall) const { + return TTIImpl->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses, + NumPrefetches, HasCall); } unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const { return TTIImpl->getMaxPrefetchIterationsAhead(); } +bool TargetTransformInfo::enableWritePrefetching() const { + return TTIImpl->enableWritePrefetching(); +} + unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const { return TTIImpl->getMaxInterleaveFactor(VF); } TargetTransformInfo::OperandValueKind -TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) { +TargetTransformInfo::getOperandInfo(const Value *V, + OperandValueProperties &OpProps) { OperandValueKind OpInfo = OK_AnyValue; OpProps = OP_None; - if (auto *CI = dyn_cast<ConstantInt>(V)) { + if (const auto *CI = dyn_cast<ConstantInt>(V)) { if (CI->getValue().isPowerOf2()) OpProps = OP_PowerOf2; return OK_UniformConstantValue; @@ -556,7 +652,7 @@ TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) { // A broadcast shuffle creates a uniform value. // TODO: Add support for non-zero index broadcasts. // TODO: Add support for different source vector width. - if (auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V)) + if (const auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V)) if (ShuffleInst->isZeroEltSplat()) OpInfo = OK_UniformValue; @@ -571,7 +667,7 @@ TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) { if (auto *CI = dyn_cast<ConstantInt>(Splat)) if (CI->getValue().isPowerOf2()) OpProps = OP_PowerOf2; - } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) { + } else if (const auto *CDS = dyn_cast<ConstantDataSequential>(V)) { OpProps = OP_PowerOf2; for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I))) @@ -592,28 +688,31 @@ TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) { } int TargetTransformInfo::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, const Instruction *CxtI) const { int Cost = TTIImpl->getArithmeticInstrCost( - Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); + Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, + Args, CxtI); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index, - Type *SubTp) const { +int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, VectorType *Ty, + int Index, VectorType *SubTp) const { int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src, const Instruction *I) const { - assert ((I == nullptr || I->getOpcode() == Opcode) && - "Opcode should reflect passed instruction."); - int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I); +int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, + const Instruction *I) const { + assert((I == nullptr || I->getOpcode() == Opcode) && + "Opcode should reflect passed instruction."); + int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -626,17 +725,20 @@ int TargetTransformInfo::getExtractWithExtendCost(unsigned Opcode, Type *Dst, return Cost; } -int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { - int Cost = TTIImpl->getCFInstrCost(Opcode); +int TargetTransformInfo::getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getCFInstrCost(Opcode, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, const Instruction *I) const { - assert ((I == nullptr || I->getOpcode() == Opcode) && - "Opcode should reflect passed instruction."); - int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + Type *CondTy, + TTI::TargetCostKind CostKind, + const Instruction *I) const { + assert((I == nullptr || I->getOpcode() == Opcode) && + "Opcode should reflect passed instruction."); + int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -649,65 +751,59 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, } int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, - MaybeAlign Alignment, - unsigned AddressSpace, + Align Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I) const { - assert ((I == nullptr || I->getOpcode() == Opcode) && - "Opcode should reflect passed instruction."); - int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); + assert((I == nullptr || I->getOpcode() == Opcode) && + "Opcode should reflect passed instruction."); + int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const { +int TargetTransformInfo::getMaskedMemoryOpCost( + unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind) const { int Cost = - TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, - Value *Ptr, bool VariableMask, - unsigned Alignment) const { +int TargetTransformInfo::getGatherScatterOpCost( + unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, + Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const { int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, - Alignment); + Alignment, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, - unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond, - bool UseMaskForGaps) const { - int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, - UseMaskForCond, - UseMaskForGaps); - assert(Cost >= 0 && "TTI should not produce negative costs!"); - return Cost; -} - -int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed) const { - int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed); + Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, + bool UseMaskForCond, bool UseMaskForGaps) const { + int Cost = TTIImpl->getInterleavedMemoryOpCost( + Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind, + UseMaskForCond, UseMaskForGaps); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const { - int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF); +int +TargetTransformInfo::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getIntrinsicInstrCost(ICA, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy, - ArrayRef<Type *> Tys) const { - int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys); + ArrayRef<Type *> Tys, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -730,18 +826,22 @@ int TargetTransformInfo::getMemcpyCost(const Instruction *I) const { return Cost; } -int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, Type *Ty, - bool IsPairwiseForm) const { - int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm); +int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, + VectorType *Ty, + bool IsPairwiseForm, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm, + CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -int TargetTransformInfo::getMinMaxReductionCost(Type *Ty, Type *CondTy, - bool IsPairwiseForm, - bool IsUnsigned) const { +int TargetTransformInfo::getMinMaxReductionCost( + VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, + TTI::TargetCostKind CostKind) const { int Cost = - TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned); + TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned, + CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -765,18 +865,19 @@ Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic( return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); } -Type *TargetTransformInfo::getMemcpyLoopLoweringType(LLVMContext &Context, - Value *Length, - unsigned SrcAlign, - unsigned DestAlign) const { - return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign, - DestAlign); +Type *TargetTransformInfo::getMemcpyLoopLoweringType( + LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, + unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const { + return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace, + DestAddrSpace, SrcAlign, DestAlign); } void TargetTransformInfo::getMemcpyLoopResidualLoweringType( SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, - unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const { + unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, + unsigned SrcAlign, unsigned DestAlign) const { TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, + SrcAddrSpace, DestAddrSpace, SrcAlign, DestAlign); } @@ -814,13 +915,13 @@ bool TargetTransformInfo::isLegalToVectorizeStore(StoreInst *SI) const { } bool TargetTransformInfo::isLegalToVectorizeLoadChain( - unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const { + unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const { return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace); } bool TargetTransformInfo::isLegalToVectorizeStoreChain( - unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const { + unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const { return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment, AddrSpace); } @@ -839,8 +940,8 @@ unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF, return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy); } -bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode, - Type *Ty, ReductionFlags Flags) const { +bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode, Type *Ty, + ReductionFlags Flags) const { return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags); } @@ -865,46 +966,21 @@ static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, else if (!SI) return false; - SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1); + SmallVector<int, 32> Mask(SI->getType()->getNumElements(), -1); // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether // we look at the left or right side. for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) Mask[i] = val; - SmallVector<int, 16> ActualMask = SI->getShuffleMask(); + ArrayRef<int> ActualMask = SI->getShuffleMask(); return Mask == ActualMask; } -namespace { -/// Kind of the reduction data. -enum ReductionKind { - RK_None, /// Not a reduction. - RK_Arithmetic, /// Binary reduction data. - RK_MinMax, /// Min/max reduction data. - RK_UnsignedMinMax, /// Unsigned min/max reduction data. -}; -/// Contains opcode + LHS/RHS parts of the reduction operations. -struct ReductionData { - ReductionData() = delete; - ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS) - : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) { - assert(Kind != RK_None && "expected binary or min/max reduction only."); - } - unsigned Opcode = 0; - Value *LHS = nullptr; - Value *RHS = nullptr; - ReductionKind Kind = RK_None; - bool hasSameData(ReductionData &RD) const { - return Kind == RD.Kind && Opcode == RD.Opcode; - } -}; -} // namespace - -static Optional<ReductionData> getReductionData(Instruction *I) { +static Optional<TTI::ReductionData> getReductionData(Instruction *I) { Value *L, *R; if (m_BinOp(m_Value(L), m_Value(R)).match(I)) - return ReductionData(RK_Arithmetic, I->getOpcode(), L, R); + return TTI::ReductionData(TTI::RK_Arithmetic, I->getOpcode(), L, R); if (auto *SI = dyn_cast<SelectInst>(I)) { if (m_SMin(m_Value(L), m_Value(R)).match(SI) || m_SMax(m_Value(L), m_Value(R)).match(SI) || @@ -913,20 +989,20 @@ static Optional<ReductionData> getReductionData(Instruction *I) { m_UnordFMin(m_Value(L), m_Value(R)).match(SI) || m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) { auto *CI = cast<CmpInst>(SI->getCondition()); - return ReductionData(RK_MinMax, CI->getOpcode(), L, R); + return TTI::ReductionData(TTI::RK_MinMax, CI->getOpcode(), L, R); } if (m_UMin(m_Value(L), m_Value(R)).match(SI) || m_UMax(m_Value(L), m_Value(R)).match(SI)) { auto *CI = cast<CmpInst>(SI->getCondition()); - return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R); + return TTI::ReductionData(TTI::RK_UnsignedMinMax, CI->getOpcode(), L, R); } } return llvm::None; } -static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, - unsigned Level, - unsigned NumLevels) { +static TTI::ReductionKind matchPairwiseReductionAtLevel(Instruction *I, + unsigned Level, + unsigned NumLevels) { // Match one level of pairwise operations. // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> @@ -934,24 +1010,24 @@ static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 if (!I) - return RK_None; + return TTI::RK_None; assert(I->getType()->isVectorTy() && "Expecting a vector type"); - Optional<ReductionData> RD = getReductionData(I); + Optional<TTI::ReductionData> RD = getReductionData(I); if (!RD) - return RK_None; + return TTI::RK_None; ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(RD->LHS); if (!LS && Level) - return RK_None; + return TTI::RK_None; ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(RD->RHS); if (!RS && Level) - return RK_None; + return TTI::RK_None; // On level 0 we can omit one shufflevector instruction. if (!Level && !RS && !LS) - return RK_None; + return TTI::RK_None; // Shuffle inputs must match. Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; @@ -960,7 +1036,7 @@ static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, if (NextLevelOpR && NextLevelOpL) { // If we have two shuffles their operands must match. if (NextLevelOpL != NextLevelOpR) - return RK_None; + return TTI::RK_None; NextLevelOp = NextLevelOpL; } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { @@ -971,46 +1047,48 @@ static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, // %NextLevelOpL = shufflevector %R, <1, undef ...> // %BinOp = fadd %NextLevelOpL, %R if (NextLevelOpL && NextLevelOpL != RD->RHS) - return RK_None; + return TTI::RK_None; else if (NextLevelOpR && NextLevelOpR != RD->LHS) - return RK_None; + return TTI::RK_None; NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS; } else - return RK_None; + return TTI::RK_None; // Check that the next levels binary operation exists and matches with the // current one. if (Level + 1 != NumLevels) { - Optional<ReductionData> NextLevelRD = + if (!isa<Instruction>(NextLevelOp)) + return TTI::RK_None; + Optional<TTI::ReductionData> NextLevelRD = getReductionData(cast<Instruction>(NextLevelOp)); if (!NextLevelRD || !RD->hasSameData(*NextLevelRD)) - return RK_None; + return TTI::RK_None; } // Shuffle mask for pairwise operation must match. if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) { if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level)) - return RK_None; + return TTI::RK_None; } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) { if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level)) - return RK_None; + return TTI::RK_None; } else { - return RK_None; + return TTI::RK_None; } if (++Level == NumLevels) return RD->Kind; // Match next level. - return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level, + return matchPairwiseReductionAtLevel(dyn_cast<Instruction>(NextLevelOp), Level, NumLevels); } -static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, - unsigned &Opcode, Type *&Ty) { +TTI::ReductionKind TTI::matchPairwiseReduction( + const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty) { if (!EnableReduxCost) - return RK_None; + return TTI::RK_None; // Need to extract the first element. ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); @@ -1018,19 +1096,19 @@ static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, if (CI) Idx = CI->getZExtValue(); if (Idx != 0) - return RK_None; + return TTI::RK_None; auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0)); if (!RdxStart) - return RK_None; - Optional<ReductionData> RD = getReductionData(RdxStart); + return TTI::RK_None; + Optional<TTI::ReductionData> RD = getReductionData(RdxStart); if (!RD) - return RK_None; + return TTI::RK_None; - Type *VecTy = RdxStart->getType(); - unsigned NumVecElems = VecTy->getVectorNumElements(); + auto *VecTy = cast<VectorType>(RdxStart->getType()); + unsigned NumVecElems = VecTy->getNumElements(); if (!isPowerOf2_32(NumVecElems)) - return RK_None; + return TTI::RK_None; // We look for a sequence of shuffle,shuffle,add triples like the following // that builds a pairwise reduction tree. @@ -1051,8 +1129,8 @@ static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 // %r = extractelement <4 x float> %bin.rdx8, i32 0 if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) == - RK_None) - return RK_None; + TTI::RK_None) + return TTI::RK_None; Opcode = RD->Opcode; Ty = VecTy; @@ -1071,11 +1149,11 @@ getShuffleAndOtherOprd(Value *L, Value *R) { return std::make_pair(L, S); } -static ReductionKind -matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, - unsigned &Opcode, Type *&Ty) { +TTI::ReductionKind TTI::matchVectorSplittingReduction( + const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty) { + if (!EnableReduxCost) - return RK_None; + return TTI::RK_None; // Need to extract the first element. ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); @@ -1083,19 +1161,19 @@ matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, if (CI) Idx = CI->getZExtValue(); if (Idx != 0) - return RK_None; + return TTI::RK_None; auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0)); if (!RdxStart) - return RK_None; - Optional<ReductionData> RD = getReductionData(RdxStart); + return TTI::RK_None; + Optional<TTI::ReductionData> RD = getReductionData(RdxStart); if (!RD) - return RK_None; + return TTI::RK_None; - Type *VecTy = ReduxRoot->getOperand(0)->getType(); - unsigned NumVecElems = VecTy->getVectorNumElements(); + auto *VecTy = cast<VectorType>(ReduxRoot->getOperand(0)->getType()); + unsigned NumVecElems = VecTy->getNumElements(); if (!isPowerOf2_32(NumVecElems)) - return RK_None; + return TTI::RK_None; // We look for a sequence of shuffles and adds like the following matching one // fadd, shuffle vector pair at a time. @@ -1115,10 +1193,10 @@ matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, while (NumVecElemsRemain - 1) { // Check for the right reduction operation. if (!RdxOp) - return RK_None; - Optional<ReductionData> RDLevel = getReductionData(RdxOp); + return TTI::RK_None; + Optional<TTI::ReductionData> RDLevel = getReductionData(RdxOp); if (!RDLevel || !RDLevel->hasSameData(*RD)) - return RK_None; + return TTI::RK_None; Value *NextRdxOp; ShuffleVectorInst *Shuffle; @@ -1127,9 +1205,9 @@ matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, // Check the current reduction operation and the shuffle use the same value. if (Shuffle == nullptr) - return RK_None; + return TTI::RK_None; if (Shuffle->getOperand(0) != NextRdxOp) - return RK_None; + return TTI::RK_None; // Check that shuffle masks matches. for (unsigned j = 0; j != MaskStart; ++j) @@ -1137,9 +1215,9 @@ matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, // Fill the rest of the mask with -1 for undef. std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); - SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); + ArrayRef<int> Mask = Shuffle->getShuffleMask(); if (ShuffleMask != Mask) - return RK_None; + return TTI::RK_None; RdxOp = dyn_cast<Instruction>(NextRdxOp); NumVecElemsRemain /= 2; @@ -1152,15 +1230,13 @@ matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, } int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + switch (I->getOpcode()) { case Instruction::GetElementPtr: - return getUserCost(I); - case Instruction::Ret: case Instruction::PHI: - case Instruction::Br: { - return getCFInstrCost(I->getOpcode()); - } + case Instruction::Br: case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: @@ -1178,48 +1254,13 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { case Instruction::AShr: case Instruction::And: case Instruction::Or: - case Instruction::Xor: { - TargetTransformInfo::OperandValueKind Op1VK, Op2VK; - TargetTransformInfo::OperandValueProperties Op1VP, Op2VP; - Op1VK = getOperandInfo(I->getOperand(0), Op1VP); - Op2VK = getOperandInfo(I->getOperand(1), Op2VP); - SmallVector<const Value *, 2> Operands(I->operand_values()); - return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, - Op1VP, Op2VP, Operands, I); - } - case Instruction::FNeg: { - TargetTransformInfo::OperandValueKind Op1VK, Op2VK; - TargetTransformInfo::OperandValueProperties Op1VP, Op2VP; - Op1VK = getOperandInfo(I->getOperand(0), Op1VP); - Op2VK = OK_AnyValue; - Op2VP = OP_None; - SmallVector<const Value *, 2> Operands(I->operand_values()); - return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, - Op1VP, Op2VP, Operands, I); - } - case Instruction::Select: { - const SelectInst *SI = cast<SelectInst>(I); - Type *CondTy = SI->getCondition()->getType(); - return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I); - } + case Instruction::Xor: + case Instruction::FNeg: + case Instruction::Select: case Instruction::ICmp: - case Instruction::FCmp: { - Type *ValTy = I->getOperand(0)->getType(); - return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I); - } - case Instruction::Store: { - const StoreInst *SI = cast<StoreInst>(I); - Type *ValTy = SI->getValueOperand()->getType(); - return getMemoryOpCost(I->getOpcode(), ValTy, - MaybeAlign(SI->getAlignment()), - SI->getPointerAddressSpace(), I); - } - case Instruction::Load: { - const LoadInst *LI = cast<LoadInst>(I); - return getMemoryOpCost(I->getOpcode(), I->getType(), - MaybeAlign(LI->getAlignment()), - LI->getPointerAddressSpace(), I); - } + case Instruction::FCmp: + case Instruction::Store: + case Instruction::Load: case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: @@ -1232,113 +1273,13 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: - case Instruction::AddrSpaceCast: { - Type *SrcTy = I->getOperand(0)->getType(); - return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I); - } - case Instruction::ExtractElement: { - const ExtractElementInst * EEI = cast<ExtractElementInst>(I); - ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); - unsigned Idx = -1; - if (CI) - Idx = CI->getZExtValue(); - - // Try to match a reduction sequence (series of shufflevector and vector - // adds followed by a extractelement). - unsigned ReduxOpCode; - Type *ReduxType; - - switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) { - case RK_Arithmetic: - return getArithmeticReductionCost(ReduxOpCode, ReduxType, - /*IsPairwiseForm=*/false); - case RK_MinMax: - return getMinMaxReductionCost( - ReduxType, CmpInst::makeCmpResultType(ReduxType), - /*IsPairwiseForm=*/false, /*IsUnsigned=*/false); - case RK_UnsignedMinMax: - return getMinMaxReductionCost( - ReduxType, CmpInst::makeCmpResultType(ReduxType), - /*IsPairwiseForm=*/false, /*IsUnsigned=*/true); - case RK_None: - break; - } - - switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) { - case RK_Arithmetic: - return getArithmeticReductionCost(ReduxOpCode, ReduxType, - /*IsPairwiseForm=*/true); - case RK_MinMax: - return getMinMaxReductionCost( - ReduxType, CmpInst::makeCmpResultType(ReduxType), - /*IsPairwiseForm=*/true, /*IsUnsigned=*/false); - case RK_UnsignedMinMax: - return getMinMaxReductionCost( - ReduxType, CmpInst::makeCmpResultType(ReduxType), - /*IsPairwiseForm=*/true, /*IsUnsigned=*/true); - case RK_None: - break; - } - - return getVectorInstrCost(I->getOpcode(), - EEI->getOperand(0)->getType(), Idx); - } - case Instruction::InsertElement: { - const InsertElementInst * IE = cast<InsertElementInst>(I); - ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); - unsigned Idx = -1; - if (CI) - Idx = CI->getZExtValue(); - return getVectorInstrCost(I->getOpcode(), - IE->getType(), Idx); - } + case Instruction::AddrSpaceCast: + case Instruction::ExtractElement: + case Instruction::InsertElement: case Instruction::ExtractValue: - return 0; // Model all ExtractValue nodes as free. - case Instruction::ShuffleVector: { - const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); - Type *Ty = Shuffle->getType(); - Type *SrcTy = Shuffle->getOperand(0)->getType(); - - // TODO: Identify and add costs for insert subvector, etc. - int SubIndex; - if (Shuffle->isExtractSubvectorMask(SubIndex)) - return TTIImpl->getShuffleCost(SK_ExtractSubvector, SrcTy, SubIndex, Ty); - - if (Shuffle->changesLength()) - return -1; - - if (Shuffle->isIdentity()) - return 0; - - if (Shuffle->isReverse()) - return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr); - - if (Shuffle->isSelect()) - return TTIImpl->getShuffleCost(SK_Select, Ty, 0, nullptr); - - if (Shuffle->isTranspose()) - return TTIImpl->getShuffleCost(SK_Transpose, Ty, 0, nullptr); - - if (Shuffle->isZeroEltSplat()) - return TTIImpl->getShuffleCost(SK_Broadcast, Ty, 0, nullptr); - - if (Shuffle->isSingleSource()) - return TTIImpl->getShuffleCost(SK_PermuteSingleSrc, Ty, 0, nullptr); - - return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr); - } + case Instruction::ShuffleVector: case Instruction::Call: - if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - SmallVector<Value *, 4> Args(II->arg_operands()); - - FastMathFlags FMF; - if (auto *FPMO = dyn_cast<FPMathOperator>(II)) - FMF = FPMO->getFastMathFlags(); - - return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), - Args, FMF); - } - return -1; + return getUserCost(I, CostKind); default: // We don't have any information on this instruction. return -1; diff --git a/llvm/lib/Analysis/TypeMetadataUtils.cpp b/llvm/lib/Analysis/TypeMetadataUtils.cpp index 072d291f3f932..8735d56f907a0 100644 --- a/llvm/lib/Analysis/TypeMetadataUtils.cpp +++ b/llvm/lib/Analysis/TypeMetadataUtils.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" @@ -37,10 +38,10 @@ findCallsAtConstantOffset(SmallVectorImpl<DevirtCallSite> &DevirtCalls, if (isa<BitCastInst>(User)) { findCallsAtConstantOffset(DevirtCalls, HasNonCallUses, User, Offset, CI, DT); - } else if (auto CI = dyn_cast<CallInst>(User)) { - DevirtCalls.push_back({Offset, CI}); - } else if (auto II = dyn_cast<InvokeInst>(User)) { - DevirtCalls.push_back({Offset, II}); + } else if (auto *CI = dyn_cast<CallInst>(User)) { + DevirtCalls.push_back({Offset, *CI}); + } else if (auto *II = dyn_cast<InvokeInst>(User)) { + DevirtCalls.push_back({Offset, *II}); } else if (HasNonCallUses) { *HasNonCallUses = true; } diff --git a/llvm/lib/Analysis/VFABIDemangling.cpp b/llvm/lib/Analysis/VFABIDemangling.cpp index a331b95e818b2..0192a216b2f7d 100644 --- a/llvm/lib/Analysis/VFABIDemangling.cpp +++ b/llvm/lib/Analysis/VFABIDemangling.cpp @@ -70,6 +70,9 @@ ParseRet tryParseMask(StringRef &MangledName, bool &IsMasked) { /// ParseRet tryParseVLEN(StringRef &ParseString, unsigned &VF, bool &IsScalable) { if (ParseString.consume_front("x")) { + // Set VF to 0, to be later adjusted to a value grater than zero + // by looking at the signature of the vector function with + // `getECFromSignature`. VF = 0; IsScalable = true; return ParseRet::OK; @@ -78,6 +81,10 @@ ParseRet tryParseVLEN(StringRef &ParseString, unsigned &VF, bool &IsScalable) { if (ParseString.consumeInteger(10, VF)) return ParseRet::Error; + // The token `0` is invalid for VLEN. + if (VF == 0) + return ParseRet::Error; + IsScalable = false; return ParseRet::OK; } @@ -207,28 +214,6 @@ ParseRet tryParseLinearWithCompileTimeStep(StringRef &ParseString, return ParseRet::None; } -/// The function looks for the following strings at the beginning of -/// the input string `ParseString`: -/// -/// "u" <number> -/// -/// On success, it removes the parsed parameter from `ParseString`, -/// sets `PKind` to the correspondent enum value, sets `Pos` to -/// <number>, and return success. On a syntax error, it return a -/// parsing error. If nothing is parsed, it returns None. -ParseRet tryParseUniform(StringRef &ParseString, VFParamKind &PKind, int &Pos) { - // "u" <Pos> - const char *UniformToken = "u"; - if (ParseString.consume_front(UniformToken)) { - PKind = VFABI::getVFParamKindFromString(UniformToken); - if (ParseString.consumeInteger(10, Pos)) - return ParseRet::Error; - - return ParseRet::OK; - } - return ParseRet::None; -} - /// Looks into the <parameters> part of the mangled name in search /// for valid paramaters at the beginning of the string /// `ParseString`. @@ -245,6 +230,12 @@ ParseRet tryParseParameter(StringRef &ParseString, VFParamKind &PKind, return ParseRet::OK; } + if (ParseString.consume_front("u")) { + PKind = VFParamKind::OMP_Uniform; + StepOrPos = 0; + return ParseRet::OK; + } + const ParseRet HasLinearRuntime = tryParseLinearWithRuntimeStep(ParseString, PKind, StepOrPos); if (HasLinearRuntime != ParseRet::None) @@ -255,10 +246,6 @@ ParseRet tryParseParameter(StringRef &ParseString, VFParamKind &PKind, if (HasLinearCompileTime != ParseRet::None) return HasLinearCompileTime; - const ParseRet HasUniform = tryParseUniform(ParseString, PKind, StepOrPos); - if (HasUniform != ParseRet::None) - return HasUniform; - return ParseRet::None; } @@ -287,11 +274,50 @@ ParseRet tryParseAlign(StringRef &ParseString, Align &Alignment) { return ParseRet::None; } +#ifndef NDEBUG +// Verify the assumtion that all vectors in the signature of a vector +// function have the same number of elements. +bool verifyAllVectorsHaveSameWidth(FunctionType *Signature) { + SmallVector<VectorType *, 2> VecTys; + if (auto *RetTy = dyn_cast<VectorType>(Signature->getReturnType())) + VecTys.push_back(RetTy); + for (auto *Ty : Signature->params()) + if (auto *VTy = dyn_cast<VectorType>(Ty)) + VecTys.push_back(VTy); + + if (VecTys.size() <= 1) + return true; + + assert(VecTys.size() > 1 && "Invalid number of elements."); + const ElementCount EC = VecTys[0]->getElementCount(); + return llvm::all_of( + llvm::make_range(VecTys.begin() + 1, VecTys.end()), + [&EC](VectorType *VTy) { return (EC == VTy->getElementCount()); }); +} + +#endif // NDEBUG + +// Extract the VectorizationFactor from a given function signature, +// under the assumtion that all vectors have the same number of +// elements, i.e. same ElementCount.Min. +ElementCount getECFromSignature(FunctionType *Signature) { + assert(verifyAllVectorsHaveSameWidth(Signature) && + "Invalid vector signature."); + + if (auto *RetTy = dyn_cast<VectorType>(Signature->getReturnType())) + return RetTy->getElementCount(); + for (auto *Ty : Signature->params()) + if (auto *VTy = dyn_cast<VectorType>(Ty)) + return VTy->getElementCount(); + + return ElementCount(/*Min=*/1, /*Scalable=*/false); +} } // namespace // Format of the ABI name: // _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)] -Optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName) { +Optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName, + const Module &M) { const StringRef OriginalName = MangledName; // Assume there is no custom name <redirection>, and therefore the // vector name consists of @@ -402,8 +428,34 @@ Optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName) { assert(Parameters.back().ParamKind == VFParamKind::GlobalPredicate && "The global predicate must be the last parameter"); + // Adjust the VF for scalable signatures. The EC.Min is not encoded + // in the name of the function, but it is encoded in the IR + // signature of the function. We need to extract this information + // because it is needed by the loop vectorizer, which reasons in + // terms of VectorizationFactor or ElementCount. In particular, we + // need to make sure that the VF field of the VFShape class is never + // set to 0. + if (IsScalable) { + const Function *F = M.getFunction(VectorName); + // The declaration of the function must be present in the module + // to be able to retrieve its signature. + if (!F) + return None; + const ElementCount EC = getECFromSignature(F->getFunctionType()); + VF = EC.Min; + } + + // Sanity checks. + // 1. We don't accept a zero lanes vectorization factor. + // 2. We don't accept the demangling if the vector function is not + // present in the module. + if (VF == 0) + return None; + if (!M.getFunction(VectorName)) + return None; + const VFShape Shape({VF, IsScalable, Parameters}); - return VFInfo({Shape, ScalarName, VectorName, ISA}); + return VFInfo({Shape, std::string(ScalarName), std::string(VectorName), ISA}); } VFParamKind VFABI::getVFParamKindFromString(const StringRef Token) { diff --git a/llvm/lib/Analysis/ValueLattice.cpp b/llvm/lib/Analysis/ValueLattice.cpp index a0115a0eec36c..627166e2409d3 100644 --- a/llvm/lib/Analysis/ValueLattice.cpp +++ b/llvm/lib/Analysis/ValueLattice.cpp @@ -10,13 +10,21 @@ namespace llvm { raw_ostream &operator<<(raw_ostream &OS, const ValueLatticeElement &Val) { - if (Val.isUndefined()) - return OS << "undefined"; + if (Val.isUnknown()) + return OS << "unknown"; + if (Val.isUndef()) + return OS << "undef"; if (Val.isOverdefined()) return OS << "overdefined"; if (Val.isNotConstant()) return OS << "notconstant<" << *Val.getNotConstant() << ">"; + + if (Val.isConstantRangeIncludingUndef()) + return OS << "constantrange incl. undef <" + << Val.getConstantRange(true).getLower() << ", " + << Val.getConstantRange(true).getUpper() << ">"; + if (Val.isConstantRange()) return OS << "constantrange<" << Val.getConstantRange().getLower() << ", " << Val.getConstantRange().getUpper() << ">"; diff --git a/llvm/lib/Analysis/ValueLatticeUtils.cpp b/llvm/lib/Analysis/ValueLatticeUtils.cpp index 3f9287e26ce7e..53638c351f722 100644 --- a/llvm/lib/Analysis/ValueLatticeUtils.cpp +++ b/llvm/lib/Analysis/ValueLatticeUtils.cpp @@ -28,16 +28,14 @@ bool llvm::canTrackGlobalVariableInterprocedurally(GlobalVariable *GV) { if (GV->isConstant() || !GV->hasLocalLinkage() || !GV->hasDefinitiveInitializer()) return false; - return !any_of(GV->users(), [&](User *U) { - if (auto *Store = dyn_cast<StoreInst>(U)) { - if (Store->getValueOperand() == GV || Store->isVolatile()) - return true; - } else if (auto *Load = dyn_cast<LoadInst>(U)) { - if (Load->isVolatile()) - return true; - } else { - return true; - } + return all_of(GV->users(), [&](User *U) { + // Currently all users of a global variable have to be none-volatile loads + // or stores and the global cannot be stored itself. + if (auto *Store = dyn_cast<StoreInst>(U)) + return Store->getValueOperand() != GV && !Store->isVolatile(); + if (auto *Load = dyn_cast<LoadInst>(U)) + return !Load->isVolatile(); + return false; }); } diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index ad6765e2514b4..43caaa62c2ec5 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -34,7 +35,6 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" @@ -163,8 +163,61 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) { return nullptr; } -static void computeKnownBits(const Value *V, KnownBits &Known, - unsigned Depth, const Query &Q); +static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf, + const APInt &DemandedElts, + APInt &DemandedLHS, APInt &DemandedRHS) { + // The length of scalable vectors is unknown at compile time, thus we + // cannot check their values + if (isa<ScalableVectorType>(Shuf->getType())) + return false; + + int NumElts = + cast<VectorType>(Shuf->getOperand(0)->getType())->getNumElements(); + int NumMaskElts = Shuf->getType()->getNumElements(); + DemandedLHS = DemandedRHS = APInt::getNullValue(NumElts); + if (DemandedElts.isNullValue()) + return true; + // Simple case of a shuffle with zeroinitializer. + if (all_of(Shuf->getShuffleMask(), [](int Elt) { return Elt == 0; })) { + DemandedLHS.setBit(0); + return true; + } + for (int i = 0; i != NumMaskElts; ++i) { + if (!DemandedElts[i]) + continue; + int M = Shuf->getMaskValue(i); + assert(M < (NumElts * 2) && "Invalid shuffle mask constant"); + + // For undef elements, we don't know anything about the common state of + // the shuffle result. + if (M == -1) + return false; + if (M < NumElts) + DemandedLHS.setBit(M % NumElts); + else + DemandedRHS.setBit(M % NumElts); + } + + return true; +} + +static void computeKnownBits(const Value *V, const APInt &DemandedElts, + KnownBits &Known, unsigned Depth, const Query &Q); + +static void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, + const Query &Q) { + // FIXME: We currently have no way to represent the DemandedElts of a scalable + // vector + if (isa<ScalableVectorType>(V->getType())) { + Known.resetAll(); + return; + } + + auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); + APInt DemandedElts = + FVTy ? APInt::getAllOnesValue(FVTy->getNumElements()) : APInt(1, 1); + computeKnownBits(V, DemandedElts, Known, Depth, Q); +} void llvm::computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth, @@ -175,6 +228,18 @@ void llvm::computeKnownBits(const Value *V, KnownBits &Known, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); } +void llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, + KnownBits &Known, const DataLayout &DL, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, const DominatorTree *DT, + OptimizationRemarkEmitter *ORE, bool UseInstrInfo) { + ::computeKnownBits(V, DemandedElts, Known, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); +} + +static KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, + unsigned Depth, const Query &Q); + static KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q); @@ -188,6 +253,17 @@ KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL, V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); } +KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, + const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT, + OptimizationRemarkEmitter *ORE, + bool UseInstrInfo) { + return ::computeKnownBits( + V, DemandedElts, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); +} + bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, @@ -235,6 +311,9 @@ bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, V, OrZero, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); } +static bool isKnownNonZero(const Value *V, const APInt &DemandedElts, + unsigned Depth, const Query &Q); + static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q); bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth, @@ -295,8 +374,21 @@ bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask, V, Mask, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); } +static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts, + unsigned Depth, const Query &Q); + static unsigned ComputeNumSignBits(const Value *V, unsigned Depth, - const Query &Q); + const Query &Q) { + // FIXME: We currently have no way to represent the DemandedElts of a scalable + // vector + if (isa<ScalableVectorType>(V->getType())) + return 1; + + auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); + APInt DemandedElts = + FVTy ? APInt::getAllOnesValue(FVTy->getNumElements()) : APInt(1, 1); + return ComputeNumSignBits(V, DemandedElts, Depth, Q); +} unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, @@ -307,26 +399,27 @@ unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, } static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, - bool NSW, + bool NSW, const APInt &DemandedElts, KnownBits &KnownOut, KnownBits &Known2, unsigned Depth, const Query &Q) { - unsigned BitWidth = KnownOut.getBitWidth(); + computeKnownBits(Op1, DemandedElts, KnownOut, Depth + 1, Q); - // If an initial sequence of bits in the result is not needed, the - // corresponding bits in the operands are not needed. - KnownBits LHSKnown(BitWidth); - computeKnownBits(Op0, LHSKnown, Depth + 1, Q); - computeKnownBits(Op1, Known2, Depth + 1, Q); + // If one operand is unknown and we have no nowrap information, + // the result will be unknown independently of the second operand. + if (KnownOut.isUnknown() && !NSW) + return; - KnownOut = KnownBits::computeForAddSub(Add, NSW, LHSKnown, Known2); + computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q); + KnownOut = KnownBits::computeForAddSub(Add, NSW, Known2, KnownOut); } static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, - KnownBits &Known, KnownBits &Known2, - unsigned Depth, const Query &Q) { + const APInt &DemandedElts, KnownBits &Known, + KnownBits &Known2, unsigned Depth, + const Query &Q) { unsigned BitWidth = Known.getBitWidth(); - computeKnownBits(Op1, Known, Depth + 1, Q); - computeKnownBits(Op0, Known2, Depth + 1, Q); + computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q); + computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q); bool isKnownNegative = false; bool isKnownNonNegative = false; @@ -535,6 +628,29 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv, // feeding the assume is trivially true, thus causing the removal of // the assume). + if (Inv->getParent() == CxtI->getParent()) { + // If Inv and CtxI are in the same block, check if the assume (Inv) is first + // in the BB. + if (Inv->comesBefore(CxtI)) + return true; + + // Don't let an assume affect itself - this would cause the problems + // `isEphemeralValueOf` is trying to prevent, and it would also make + // the loop below go out of bounds. + if (Inv == CxtI) + return false; + + // The context comes first, but they're both in the same block. + // Make sure there is nothing in between that might interrupt + // the control flow, not even CxtI itself. + for (BasicBlock::const_iterator I(CxtI), IE(Inv); I != IE; ++I) + if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) + return false; + + return !isEphemeralValueOf(Inv, CxtI); + } + + // Inv and CxtI are in different blocks. if (DT) { if (DT->dominates(Inv, CxtI)) return true; @@ -543,37 +659,7 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv, return true; } - // With or without a DT, the only remaining case we will check is if the - // instructions are in the same BB. Give up if that is not the case. - if (Inv->getParent() != CxtI->getParent()) - return false; - - // If we have a dom tree, then we now know that the assume doesn't dominate - // the other instruction. If we don't have a dom tree then we can check if - // the assume is first in the BB. - if (!DT) { - // Search forward from the assume until we reach the context (or the end - // of the block); the common case is that the assume will come first. - for (auto I = std::next(BasicBlock::const_iterator(Inv)), - IE = Inv->getParent()->end(); I != IE; ++I) - if (&*I == CxtI) - return true; - } - - // Don't let an assume affect itself - this would cause the problems - // `isEphemeralValueOf` is trying to prevent, and it would also make - // the loop below go out of bounds. - if (Inv == CxtI) - return false; - - // The context comes first, but they're both in the same block. - // Make sure there is nothing in between that might interrupt - // the control flow, not even CxtI itself. - for (BasicBlock::const_iterator I(CxtI), IE(Inv); I != IE; ++I) - if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) - return false; - - return !isEphemeralValueOf(Inv, CxtI); + return false; } static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) { @@ -592,10 +678,6 @@ static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) { CmpInst::Predicate Pred; if (!match(Cmp, m_c_ICmp(Pred, m_V, m_Value(RHS)))) return false; - // Canonicalize 'v' to be on the LHS of the comparison. - if (Cmp->getOperand(1) != RHS) - Pred = CmpInst::getSwappedPredicate(Pred); - // assume(v u> y) -> assume(v != 0) if (Pred == ICmpInst::ICMP_UGT) return true; @@ -615,6 +697,16 @@ static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) { return !TrueValues.contains(APInt::getNullValue(CI->getBitWidth())); }; + if (Q.CxtI && V->getType()->isPointerTy()) { + SmallVector<Attribute::AttrKind, 2> AttrKinds{Attribute::NonNull}; + if (!NullPointerIsDefined(Q.CxtI->getFunction(), + V->getType()->getPointerAddressSpace())) + AttrKinds.push_back(Attribute::Dereferenceable); + + if (getKnowledgeValidInContext(V, AttrKinds, Q.CxtI, Q.DT, Q.AC)) + return true; + } + for (auto &AssumeVH : Q.AC->assumptionsFor(V)) { if (!AssumeVH) continue; @@ -693,6 +785,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, if (!Cmp) continue; + // Note that ptrtoint may change the bitwidth. Value *A, *B; auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V))); @@ -705,18 +798,18 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, // assume(v = a) if (match(Cmp, m_c_ICmp(Pred, m_V, m_Value(A))) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits RHSKnown = + computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); Known.Zero |= RHSKnown.Zero; Known.One |= RHSKnown.One; // assume(v & b = a) } else if (match(Cmp, m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - KnownBits MaskKnown(BitWidth); - computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I)); + KnownBits RHSKnown = + computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); + KnownBits MaskKnown = + computeKnownBits(B, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); // For those bits in the mask that are known to be one, we can propagate // known bits from the RHS to V. @@ -726,10 +819,10 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), m_Value(A))) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - KnownBits MaskKnown(BitWidth); - computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I)); + KnownBits RHSKnown = + computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); + KnownBits MaskKnown = + computeKnownBits(B, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); // For those bits in the mask that are known to be one, we can propagate // inverted known bits from the RHS to V. @@ -739,10 +832,10 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, } else if (match(Cmp, m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - KnownBits BKnown(BitWidth); - computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); + KnownBits RHSKnown = + computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); + KnownBits BKnown = + computeKnownBits(B, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); // For those bits in B that are known to be zero, we can propagate known // bits from the RHS to V. @@ -752,10 +845,10 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), m_Value(A))) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - KnownBits BKnown(BitWidth); - computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); + KnownBits RHSKnown = + computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); + KnownBits BKnown = + computeKnownBits(B, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); // For those bits in B that are known to be zero, we can propagate // inverted known bits from the RHS to V. @@ -765,10 +858,10 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, } else if (match(Cmp, m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - KnownBits BKnown(BitWidth); - computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); + KnownBits RHSKnown = + computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); + KnownBits BKnown = + computeKnownBits(B, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); // For those bits in B that are known to be zero, we can propagate known // bits from the RHS to V. For those bits in B that are known to be one, @@ -781,10 +874,10 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), m_Value(A))) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - KnownBits BKnown(BitWidth); - computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); + KnownBits RHSKnown = + computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); + KnownBits BKnown = + computeKnownBits(B, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); // For those bits in B that are known to be zero, we can propagate // inverted known bits from the RHS to V. For those bits in B that are @@ -797,8 +890,9 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, } else if (match(Cmp, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), m_Value(A))) && isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits RHSKnown = + computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); + // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. RHSKnown.Zero.lshrInPlace(C); @@ -809,8 +903,8 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), m_Value(A))) && isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits RHSKnown = + computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. RHSKnown.One.lshrInPlace(C); @@ -821,8 +915,8 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, } else if (match(Cmp, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)), m_Value(A))) && isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits RHSKnown = + computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. Known.Zero |= RHSKnown.Zero << C; @@ -831,8 +925,8 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))), m_Value(A))) && isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits RHSKnown = + computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. Known.Zero |= RHSKnown.One << C; @@ -843,8 +937,8 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, // assume(v >=_s c) where c is non-negative if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth + 1, Query(Q, I)); + KnownBits RHSKnown = + computeKnownBits(A, Depth + 1, Query(Q, I)).anyextOrTrunc(BitWidth); if (RHSKnown.isNonNegative()) { // We know that the sign bit is zero. @@ -856,8 +950,8 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, // assume(v >_s c) where c is at least -1. if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth + 1, Query(Q, I)); + KnownBits RHSKnown = + computeKnownBits(A, Depth + 1, Query(Q, I)).anyextOrTrunc(BitWidth); if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) { // We know that the sign bit is zero. @@ -869,8 +963,8 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, // assume(v <=_s c) where c is negative if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth + 1, Query(Q, I)); + KnownBits RHSKnown = + computeKnownBits(A, Depth + 1, Query(Q, I)).anyextOrTrunc(BitWidth); if (RHSKnown.isNegative()) { // We know that the sign bit is one. @@ -882,8 +976,8 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, // assume(v <_s c) where c is non-positive if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits RHSKnown = + computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); if (RHSKnown.isZero() || RHSKnown.isNegative()) { // We know that the sign bit is one. @@ -895,8 +989,8 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, // assume(v <=_u c) if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits RHSKnown = + computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); // Whatever high bits in c are zero are known to be zero. Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros()); @@ -906,8 +1000,8 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, // assume(v <_u c) if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits RHSKnown = + computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); // If the RHS is known zero, then this assumption must be wrong (nothing // is unsigned less than zero). Signal a conflict and get out of here. @@ -957,16 +1051,17 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, /// amount. The results from calling KZF and KOF are conservatively combined for /// all permitted shift amounts. static void computeKnownBitsFromShiftOperator( - const Operator *I, KnownBits &Known, KnownBits &Known2, - unsigned Depth, const Query &Q, + const Operator *I, const APInt &DemandedElts, KnownBits &Known, + KnownBits &Known2, unsigned Depth, const Query &Q, function_ref<APInt(const APInt &, unsigned)> KZF, function_ref<APInt(const APInt &, unsigned)> KOF) { unsigned BitWidth = Known.getBitWidth(); - if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1); + computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); + if (Known.isConstant()) { + unsigned ShiftAmt = Known.getConstant().getLimitedValue(BitWidth - 1); - computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); Known.Zero = KZF(Known.Zero, ShiftAmt); Known.One = KOF(Known.One, ShiftAmt); // If the known bits conflict, this must be an overflowing left shift, so @@ -978,11 +1073,10 @@ static void computeKnownBitsFromShiftOperator( return; } - computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); - // If the shift amount could be greater than or equal to the bit-width of the // LHS, the value could be poison, but bail out because the check below is - // expensive. TODO: Should we just carry on? + // expensive. + // TODO: Should we just carry on? if (Known.getMaxValue().uge(BitWidth)) { Known.resetAll(); return; @@ -1006,12 +1100,13 @@ static void computeKnownBitsFromShiftOperator( // Early exit if we can't constrain any well-defined shift amount. if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) && !(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))) { - ShifterOperandIsNonZero = isKnownNonZero(I->getOperand(1), Depth + 1, Q); + ShifterOperandIsNonZero = + isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q); if (!*ShifterOperandIsNonZero) return; } - computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); Known.Zero.setAllBits(); Known.One.setAllBits(); @@ -1028,7 +1123,7 @@ static void computeKnownBitsFromShiftOperator( if (ShiftAmt == 0) { if (!ShifterOperandIsNonZero.hasValue()) ShifterOperandIsNonZero = - isKnownNonZero(I->getOperand(1), Depth + 1, Q); + isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q); if (*ShifterOperandIsNonZero) continue; } @@ -1043,11 +1138,13 @@ static void computeKnownBitsFromShiftOperator( Known.setAllZero(); } -static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, - unsigned Depth, const Query &Q) { +static void computeKnownBitsFromOperator(const Operator *I, + const APInt &DemandedElts, + KnownBits &Known, unsigned Depth, + const Query &Q) { unsigned BitWidth = Known.getBitWidth(); - KnownBits Known2(Known); + KnownBits Known2(BitWidth); switch (I->getOpcode()) { default: break; case Instruction::Load: @@ -1057,13 +1154,10 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); - computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); + computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); - // Output known-1 bits are only known if set in both the LHS & RHS. - Known.One &= Known2.One; - // Output known-0 are known to be clear if zero in either the LHS | RHS. - Known.Zero |= Known2.Zero; + Known &= Known2; // and(x, add (x, -1)) is a common idiom that always clears the low bit; // here we handle the more general case of adding any odd number by @@ -1074,36 +1168,28 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, if (!Known.Zero[0] && !Known.One[0] && match(I, m_c_BinOp(m_Value(X), m_Add(m_Deferred(X), m_Value(Y))))) { Known2.resetAll(); - computeKnownBits(Y, Known2, Depth + 1, Q); + computeKnownBits(Y, DemandedElts, Known2, Depth + 1, Q); if (Known2.countMinTrailingOnes() > 0) Known.Zero.setBit(0); } break; } case Instruction::Or: - computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); - computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); + computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); - // Output known-0 bits are only known if clear in both the LHS & RHS. - Known.Zero &= Known2.Zero; - // Output known-1 are known to be set if set in either the LHS | RHS. - Known.One |= Known2.One; + Known |= Known2; break; - case Instruction::Xor: { - computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); - computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + case Instruction::Xor: + computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); + computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); - // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); - // Output known-1 are known to be set if set in only one of the LHS, RHS. - Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); - Known.Zero = std::move(KnownZeroOut); + Known ^= Known2; break; - } case Instruction::Mul: { bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); - computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, Known, - Known2, Depth, Q); + computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, DemandedElts, + Known, Known2, Depth, Q); break; } case Instruction::UDiv: { @@ -1207,9 +1293,9 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, Q.DL.getTypeSizeInBits(ScalarTy); assert(SrcBitWidth && "SrcBitWidth can't be zero"); - Known = Known.zextOrTrunc(SrcBitWidth, false); + Known = Known.anyextOrTrunc(SrcBitWidth); computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); - Known = Known.zextOrTrunc(BitWidth, true /* ExtendedBitsAreKnownZero */); + Known = Known.zextOrTrunc(BitWidth); break; } case Instruction::BitCast: { @@ -1254,7 +1340,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, return KOResult; }; - computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF); + computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, + KZF, KOF); break; } case Instruction::LShr: { @@ -1270,7 +1357,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, return KnownOne.lshr(ShiftAmt); }; - computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF); + computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, + KZF, KOF); break; } case Instruction::AShr: { @@ -1283,19 +1371,20 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, return KnownOne.ashr(ShiftAmt); }; - computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF); + computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, + KZF, KOF); break; } case Instruction::Sub: { bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, - Known, Known2, Depth, Q); + DemandedElts, Known, Known2, Depth, Q); break; } case Instruction::Add: { bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, - Known, Known2, Depth, Q); + DemandedElts, Known, Known2, Depth, Q); break; } case Instruction::SRem: @@ -1355,17 +1444,9 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, Known.Zero.setHighBits(Leaders); break; } - - case Instruction::Alloca: { - const AllocaInst *AI = cast<AllocaInst>(I); - unsigned Align = AI->getAlignment(); - if (Align == 0) - Align = Q.DL.getABITypeAlignment(AI->getAllocatedType()); - - if (Align > 0) - Known.Zero.setLowBits(countTrailingZeros(Align)); + case Instruction::Alloca: + Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign())); break; - } case Instruction::GetElementPtr: { // Analyze all of the subscripts of this getelementptr instruction // to determine if we can prove known low zero bits. @@ -1375,6 +1456,10 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, gep_type_iterator GTI = gep_type_begin(I); for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { + // TrailZ can only become smaller, short-circuit if we hit zero. + if (TrailZ == 0) + break; + Value *Index = I->getOperand(i); if (StructType *STy = GTI.getStructTypeOrNull()) { // Handle struct member offset arithmetic. @@ -1400,7 +1485,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, break; } unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); - uint64_t TypeSize = Q.DL.getTypeAllocSize(IndexedTy); + uint64_t TypeSize = Q.DL.getTypeAllocSize(IndexedTy).getKnownMinSize(); LocalKnown.Zero = LocalKnown.One = APInt(GEPOpiBits, 0); computeKnownBits(Index, LocalKnown, Depth + 1, Q); TrailZ = std::min(TrailZ, @@ -1457,7 +1542,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, computeKnownBits(R, Known2, Depth + 1, RecQ); // We need to take the minimum number of known bits - KnownBits Known3(Known); + KnownBits Known3(BitWidth); RecQ.CxtI = LInst; computeKnownBits(L, Known3, Depth + 1, RecQ); @@ -1549,7 +1634,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, if (MDNode *MD = Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range)) computeKnownBitsFromRangeMetadata(*MD, Known); - if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) { + if (const Value *RV = cast<CallBase>(I)->getReturnedArgOperand()) { computeKnownBits(RV, Known2, Depth + 1, Q); Known.Zero |= Known2.Zero; Known.One |= Known2.One; @@ -1558,12 +1643,12 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, switch (II->getIntrinsicID()) { default: break; case Intrinsic::bitreverse: - computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); Known.Zero |= Known2.Zero.reverseBits(); Known.One |= Known2.One.reverseBits(); break; case Intrinsic::bswap: - computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); Known.Zero |= Known2.Zero.byteSwap(); Known.One |= Known2.One.byteSwap(); break; @@ -1611,7 +1696,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, if (II->getIntrinsicID() == Intrinsic::fshr) ShiftAmt = BitWidth - ShiftAmt; - KnownBits Known3(Known); + KnownBits Known3(BitWidth); computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); computeKnownBits(I->getOperand(1), Known3, Depth + 1, Q); @@ -1658,13 +1743,85 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, } } break; - case Instruction::ExtractElement: - // Look through extract element. At the moment we keep this simple and skip - // tracking the specific element. But at least we might find information - // valid for all elements of the vector (for example if vector is sign - // extended, shifted, etc). - computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + case Instruction::ShuffleVector: { + auto *Shuf = dyn_cast<ShuffleVectorInst>(I); + // FIXME: Do we need to handle ConstantExpr involving shufflevectors? + if (!Shuf) { + Known.resetAll(); + return; + } + // For undef elements, we don't know anything about the common state of + // the shuffle result. + APInt DemandedLHS, DemandedRHS; + if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) { + Known.resetAll(); + return; + } + Known.One.setAllBits(); + Known.Zero.setAllBits(); + if (!!DemandedLHS) { + const Value *LHS = Shuf->getOperand(0); + computeKnownBits(LHS, DemandedLHS, Known, Depth + 1, Q); + // If we don't know any bits, early out. + if (Known.isUnknown()) + break; + } + if (!!DemandedRHS) { + const Value *RHS = Shuf->getOperand(1); + computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + } + break; + } + case Instruction::InsertElement: { + const Value *Vec = I->getOperand(0); + const Value *Elt = I->getOperand(1); + auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2)); + // Early out if the index is non-constant or out-of-range. + unsigned NumElts = DemandedElts.getBitWidth(); + if (!CIdx || CIdx->getValue().uge(NumElts)) { + Known.resetAll(); + return; + } + Known.One.setAllBits(); + Known.Zero.setAllBits(); + unsigned EltIdx = CIdx->getZExtValue(); + // Do we demand the inserted element? + if (DemandedElts[EltIdx]) { + computeKnownBits(Elt, Known, Depth + 1, Q); + // If we don't know any bits, early out. + if (Known.isUnknown()) + break; + } + // We don't need the base vector element that has been inserted. + APInt DemandedVecElts = DemandedElts; + DemandedVecElts.clearBit(EltIdx); + if (!!DemandedVecElts) { + computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + } break; + } + case Instruction::ExtractElement: { + // Look through extract element. If the index is non-constant or + // out-of-range demand all elements, otherwise just the extracted element. + const Value *Vec = I->getOperand(0); + const Value *Idx = I->getOperand(1); + auto *CIdx = dyn_cast<ConstantInt>(Idx); + if (isa<ScalableVectorType>(Vec->getType())) { + // FIXME: there's probably *something* we can do with scalable vectors + Known.resetAll(); + break; + } + unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements(); + APInt DemandedVecElts = APInt::getAllOnesValue(NumElts); + if (CIdx && CIdx->getValue().ult(NumElts)) + DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); + computeKnownBits(Vec, DemandedVecElts, Known, Depth + 1, Q); + break; + } case Instruction::ExtractValue: if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) { const ExtractValueInst *EVI = cast<ExtractValueInst>(I); @@ -1675,28 +1832,38 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: computeKnownBitsAddSub(true, II->getArgOperand(0), - II->getArgOperand(1), false, Known, Known2, - Depth, Q); + II->getArgOperand(1), false, DemandedElts, + Known, Known2, Depth, Q); break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: computeKnownBitsAddSub(false, II->getArgOperand(0), - II->getArgOperand(1), false, Known, Known2, - Depth, Q); + II->getArgOperand(1), false, DemandedElts, + Known, Known2, Depth, Q); break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false, - Known, Known2, Depth, Q); + DemandedElts, Known, Known2, Depth, Q); break; } } } + break; } } /// Determine which bits of V are known to be either zero or one and return /// them. +KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, + unsigned Depth, const Query &Q) { + KnownBits Known(getBitWidth(V->getType(), Q.DL)); + computeKnownBits(V, DemandedElts, Known, Depth, Q); + return Known; +} + +/// Determine which bits of V are known to be either zero or one and return +/// them. KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q) { KnownBits Known(getBitWidth(V->getType(), Q.DL)); computeKnownBits(V, Known, Depth, Q); @@ -1717,23 +1884,44 @@ KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q) { /// type, and vectors of integers. In the case /// where V is a vector, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true -/// for all of the elements in the vector. -void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, - const Query &Q) { +/// for all of the demanded elements in the vector specified by DemandedElts. +void computeKnownBits(const Value *V, const APInt &DemandedElts, + KnownBits &Known, unsigned Depth, const Query &Q) { + if (!DemandedElts || isa<ScalableVectorType>(V->getType())) { + // No demanded elts or V is a scalable vector, better to assume we don't + // know anything. + Known.resetAll(); + return; + } + assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); + +#ifndef NDEBUG + Type *Ty = V->getType(); unsigned BitWidth = Known.getBitWidth(); - assert((V->getType()->isIntOrIntVectorTy(BitWidth) || - V->getType()->isPtrOrPtrVectorTy()) && + assert((Ty->isIntOrIntVectorTy(BitWidth) || Ty->isPtrOrPtrVectorTy()) && "Not integer or pointer type!"); - Type *ScalarTy = V->getType()->getScalarType(); - unsigned ExpectedWidth = ScalarTy->isPointerTy() ? - Q.DL.getPointerTypeSizeInBits(ScalarTy) : Q.DL.getTypeSizeInBits(ScalarTy); - assert(ExpectedWidth == BitWidth && "V and Known should have same BitWidth"); - (void)BitWidth; - (void)ExpectedWidth; + if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { + assert( + FVTy->getNumElements() == DemandedElts.getBitWidth() && + "DemandedElt width should equal the fixed vector number of elements"); + } else { + assert(DemandedElts == APInt(1, 1) && + "DemandedElt width should be 1 for scalars"); + } + + Type *ScalarTy = Ty->getScalarType(); + if (ScalarTy->isPointerTy()) { + assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) && + "V and Known should have same BitWidth"); + } else { + assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) && + "V and Known should have same BitWidth"); + } +#endif const APInt *C; if (match(V, m_APInt(C))) { @@ -1749,12 +1937,14 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, } // Handle a constant vector by taking the intersection of the known bits of // each element. - if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) { - // We know that CDS must be a vector of integers. Take the intersection of + if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) { + // We know that CDV must be a vector of integers. Take the intersection of // each element. Known.Zero.setAllBits(); Known.One.setAllBits(); - for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { - APInt Elt = CDS->getElementAsAPInt(i); + for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) { + if (!DemandedElts[i]) + continue; + APInt Elt = CDV->getElementAsAPInt(i); Known.Zero &= ~Elt; Known.One &= Elt; } @@ -1766,6 +1956,8 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, // each element. Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { + if (!DemandedElts[i]) + continue; Constant *Element = CV->getAggregateElement(i); auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element); if (!ElementCI) { @@ -1804,13 +1996,12 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, } if (const Operator *I = dyn_cast<Operator>(V)) - computeKnownBitsFromOperator(I, Known, Depth, Q); + computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q); // Aligned pointers have trailing zeros - refine Known.Zero set - if (V->getType()->isPointerTy()) { - const MaybeAlign Align = V->getPointerAlignment(Q.DL); - if (Align) - Known.Zero.setLowBits(countTrailingZeros(Align->value())); + if (isa<PointerType>(V->getType())) { + Align Alignment = V->getPointerAlignment(Q.DL); + Known.Zero.setLowBits(countTrailingZeros(Alignment.value())); } // computeKnownBitsFromAssume strictly refines Known. @@ -1960,7 +2151,7 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, } // If we have a zero-sized type, the index doesn't matter. Keep looping. - if (Q.DL.getTypeAllocSize(GTI.getIndexedType()) == 0) + if (Q.DL.getTypeAllocSize(GTI.getIndexedType()).getKnownMinSize() == 0) continue; // Fast path the constant operand case both for efficiency and so we don't @@ -2004,11 +2195,11 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V, // If the value is used as an argument to a call or invoke, then argument // attributes may provide an answer about null-ness. - if (auto CS = ImmutableCallSite(U)) - if (auto *CalledFunc = CS.getCalledFunction()) + if (const auto *CB = dyn_cast<CallBase>(U)) + if (auto *CalledFunc = CB->getCalledFunction()) for (const Argument &Arg : CalledFunc->args()) - if (CS.getArgOperand(Arg.getArgNo()) == V && - Arg.hasNonNullAttr() && DT->dominates(CS.getInstruction(), CtxI)) + if (CB->getArgOperand(Arg.getArgNo()) == V && + Arg.hasNonNullAttr() && DT->dominates(CB, CtxI)) return true; // If the value is used as a load/store, then the pointer must be non null. @@ -2088,12 +2279,18 @@ static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) } /// Return true if the given value is known to be non-zero when defined. For -/// vectors, return true if every element is known to be non-zero when +/// vectors, return true if every demanded element is known to be non-zero when /// defined. For pointers, if the context instruction and dominator tree are /// specified, perform context-sensitive analysis and return true if the /// pointer couldn't possibly be null at the specified instruction. /// Supports values with integer or pointer type and vectors of integers. -bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { +bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, + const Query &Q) { + // FIXME: We currently have no way to represent the DemandedElts of a scalable + // vector + if (isa<ScalableVectorType>(V->getType())) + return false; + if (auto *C = dyn_cast<Constant>(V)) { if (C->isNullValue()) return false; @@ -2112,8 +2309,10 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { // For constant vectors, check that all elements are undefined or known // non-zero to determine that the whole vector is known non-zero. - if (auto *VecTy = dyn_cast<VectorType>(C->getType())) { + if (auto *VecTy = dyn_cast<FixedVectorType>(C->getType())) { for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) { + if (!DemandedElts[i]) + continue; Constant *Elt = C->getAggregateElement(i); if (!Elt || Elt->isNullValue()) return false; @@ -2161,7 +2360,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { // A byval, inalloca, or nonnull argument is never null. if (const Argument *A = dyn_cast<Argument>(V)) - if (A->hasByValOrInAllocaAttr() || A->hasNonNullAttr()) + if (A->hasPassPointeeByValueAttr() || A->hasNonNullAttr()) return true; // A Load tagged with nonnull metadata is never null. @@ -2214,7 +2413,8 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { // X | Y != 0 if X != 0 or Y != 0. Value *X = nullptr, *Y = nullptr; if (match(V, m_Or(m_Value(X), m_Value(Y)))) - return isKnownNonZero(X, Depth, Q) || isKnownNonZero(Y, Depth, Q); + return isKnownNonZero(X, DemandedElts, Depth, Q) || + isKnownNonZero(Y, DemandedElts, Depth, Q); // ext X != 0 if X != 0. if (isa<SExtInst>(V) || isa<ZExtInst>(V)) @@ -2229,7 +2429,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { return isKnownNonZero(X, Depth, Q); KnownBits Known(BitWidth); - computeKnownBits(X, Known, Depth, Q); + computeKnownBits(X, DemandedElts, Known, Depth, Q); if (Known.One[0]) return true; } @@ -2241,7 +2441,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { if (BO->isExact()) return isKnownNonZero(X, Depth, Q); - KnownBits Known = computeKnownBits(X, Depth, Q); + KnownBits Known = computeKnownBits(X, DemandedElts, Depth, Q); if (Known.isNegative()) return true; @@ -2255,22 +2455,23 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { return true; // Are all the bits to be shifted out known zero? if (Known.countMinTrailingZeros() >= ShiftVal) - return isKnownNonZero(X, Depth, Q); + return isKnownNonZero(X, DemandedElts, Depth, Q); } } // div exact can only produce a zero if the dividend is zero. else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) { - return isKnownNonZero(X, Depth, Q); + return isKnownNonZero(X, DemandedElts, Depth, Q); } // X + Y. else if (match(V, m_Add(m_Value(X), m_Value(Y)))) { - KnownBits XKnown = computeKnownBits(X, Depth, Q); - KnownBits YKnown = computeKnownBits(Y, Depth, Q); + KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q); + KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q); // If X and Y are both non-negative (as signed values) then their sum is not // zero unless both X and Y are zero. if (XKnown.isNonNegative() && YKnown.isNonNegative()) - if (isKnownNonZero(X, Depth, Q) || isKnownNonZero(Y, Depth, Q)) + if (isKnownNonZero(X, DemandedElts, Depth, Q) || + isKnownNonZero(Y, DemandedElts, Depth, Q)) return true; // If X and Y are both negative (as signed values) then their sum is not @@ -2301,13 +2502,14 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { // If X and Y are non-zero then so is X * Y as long as the multiplication // does not overflow. if ((Q.IIQ.hasNoSignedWrap(BO) || Q.IIQ.hasNoUnsignedWrap(BO)) && - isKnownNonZero(X, Depth, Q) && isKnownNonZero(Y, Depth, Q)) + isKnownNonZero(X, DemandedElts, Depth, Q) && + isKnownNonZero(Y, DemandedElts, Depth, Q)) return true; } // (C ? X : Y) != 0 if X != 0 and Y != 0. else if (const SelectInst *SI = dyn_cast<SelectInst>(V)) { - if (isKnownNonZero(SI->getTrueValue(), Depth, Q) && - isKnownNonZero(SI->getFalseValue(), Depth, Q)) + if (isKnownNonZero(SI->getTrueValue(), DemandedElts, Depth, Q) && + isKnownNonZero(SI->getFalseValue(), DemandedElts, Depth, Q)) return true; } // PHI @@ -2337,12 +2539,35 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { if (AllNonZeroConstants) return true; } + // ExtractElement + else if (const auto *EEI = dyn_cast<ExtractElementInst>(V)) { + const Value *Vec = EEI->getVectorOperand(); + const Value *Idx = EEI->getIndexOperand(); + auto *CIdx = dyn_cast<ConstantInt>(Idx); + unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements(); + APInt DemandedVecElts = APInt::getAllOnesValue(NumElts); + if (CIdx && CIdx->getValue().ult(NumElts)) + DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); + return isKnownNonZero(Vec, DemandedVecElts, Depth, Q); + } KnownBits Known(BitWidth); - computeKnownBits(V, Known, Depth, Q); + computeKnownBits(V, DemandedElts, Known, Depth, Q); return Known.One != 0; } +bool isKnownNonZero(const Value* V, unsigned Depth, const Query& Q) { + // FIXME: We currently have no way to represent the DemandedElts of a scalable + // vector + if (isa<ScalableVectorType>(V->getType())) + return false; + + auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); + APInt DemandedElts = + FVTy ? APInt::getAllOnesValue(FVTy->getNumElements()) : APInt(1, 1); + return isKnownNonZero(V, DemandedElts, Depth, Q); +} + /// Return true if V2 == V1 + X, where X is known non-zero. static bool isAddOfNonZero(const Value *V1, const Value *V2, const Query &Q) { const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1); @@ -2433,14 +2658,17 @@ static bool isSignedMinMaxClamp(const Value *Select, const Value *&In, /// or if any element was not analyzed; otherwise, return the count for the /// element with the minimum number of sign bits. static unsigned computeNumSignBitsVectorConstant(const Value *V, + const APInt &DemandedElts, unsigned TyBits) { const auto *CV = dyn_cast<Constant>(V); - if (!CV || !CV->getType()->isVectorTy()) + if (!CV || !isa<FixedVectorType>(CV->getType())) return 0; unsigned MinSignBits = TyBits; - unsigned NumElts = CV->getType()->getVectorNumElements(); + unsigned NumElts = cast<FixedVectorType>(CV->getType())->getNumElements(); for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i]) + continue; // If we find a non-ConstantInt, bail out. auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i)); if (!Elt) @@ -2452,12 +2680,13 @@ static unsigned computeNumSignBitsVectorConstant(const Value *V, return MinSignBits; } -static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, - const Query &Q); +static unsigned ComputeNumSignBitsImpl(const Value *V, + const APInt &DemandedElts, + unsigned Depth, const Query &Q); -static unsigned ComputeNumSignBits(const Value *V, unsigned Depth, - const Query &Q) { - unsigned Result = ComputeNumSignBitsImpl(V, Depth, Q); +static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts, + unsigned Depth, const Query &Q) { + unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q); assert(Result > 0 && "At least one sign bit needs to be present!"); return Result; } @@ -2467,16 +2696,36 @@ static unsigned ComputeNumSignBits(const Value *V, unsigned Depth, /// (itself), but other cases can give us information. For example, immediately /// after an "ashr X, 2", we know that the top 3 bits are all equal to each /// other, so we return 3. For vectors, return the number of sign bits for the -/// vector element with the minimum number of known sign bits. -static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, - const Query &Q) { +/// vector element with the minimum number of known sign bits of the demanded +/// elements in the vector specified by DemandedElts. +static unsigned ComputeNumSignBitsImpl(const Value *V, + const APInt &DemandedElts, + unsigned Depth, const Query &Q) { + Type *Ty = V->getType(); + + // FIXME: We currently have no way to represent the DemandedElts of a scalable + // vector + if (isa<ScalableVectorType>(Ty)) + return 1; + +#ifndef NDEBUG assert(Depth <= MaxDepth && "Limit Search Depth"); + if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { + assert( + FVTy->getNumElements() == DemandedElts.getBitWidth() && + "DemandedElt width should equal the fixed vector number of elements"); + } else { + assert(DemandedElts == APInt(1, 1) && + "DemandedElt width should be 1 for scalars"); + } +#endif + // We return the minimum number of sign bits that are guaranteed to be present // in V, so for undef we have to conservatively return 1. We don't have the // same behavior for poison though -- that's a FIXME today. - Type *ScalarTy = V->getType()->getScalarType(); + Type *ScalarTy = Ty->getScalarType(); unsigned TyBits = ScalarTy->isPointerTy() ? Q.DL.getPointerTypeSizeInBits(ScalarTy) : Q.DL.getTypeSizeInBits(ScalarTy); @@ -2702,40 +2951,37 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); case Instruction::ShuffleVector: { - // TODO: This is copied almost directly from the SelectionDAG version of - // ComputeNumSignBits. It would be better if we could share common - // code. If not, make sure that changes are translated to the DAG. - // Collect the minimum number of sign bits that are shared by every vector // element referenced by the shuffle. - auto *Shuf = cast<ShuffleVectorInst>(U); - int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements(); - int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements(); - APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); - for (int i = 0; i != NumMaskElts; ++i) { - int M = Shuf->getMaskValue(i); - assert(M < NumElts * 2 && "Invalid shuffle mask constant"); - // For undef elements, we don't know anything about the common state of - // the shuffle result. - if (M == -1) - return 1; - if (M < NumElts) - DemandedLHS.setBit(M % NumElts); - else - DemandedRHS.setBit(M % NumElts); + auto *Shuf = dyn_cast<ShuffleVectorInst>(U); + if (!Shuf) { + // FIXME: Add support for shufflevector constant expressions. + return 1; } + APInt DemandedLHS, DemandedRHS; + // For undef elements, we don't know anything about the common state of + // the shuffle result. + if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) + return 1; Tmp = std::numeric_limits<unsigned>::max(); - if (!!DemandedLHS) - Tmp = ComputeNumSignBits(Shuf->getOperand(0), Depth + 1, Q); + if (!!DemandedLHS) { + const Value *LHS = Shuf->getOperand(0); + Tmp = ComputeNumSignBits(LHS, DemandedLHS, Depth + 1, Q); + } + // If we don't know anything, early out and try computeKnownBits + // fall-back. + if (Tmp == 1) + break; if (!!DemandedRHS) { - Tmp2 = ComputeNumSignBits(Shuf->getOperand(1), Depth + 1, Q); + const Value *RHS = Shuf->getOperand(1); + Tmp2 = ComputeNumSignBits(RHS, DemandedRHS, Depth + 1, Q); Tmp = std::min(Tmp, Tmp2); } // If we don't know anything, early out and try computeKnownBits // fall-back. if (Tmp == 1) break; - assert(Tmp <= V->getType()->getScalarSizeInBits() && + assert(Tmp <= Ty->getScalarSizeInBits() && "Failed to determine minimum sign bits"); return Tmp; } @@ -2747,11 +2993,12 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, // If we can examine all elements of a vector constant successfully, we're // done (we can't do any better than that). If not, keep trying. - if (unsigned VecSignBits = computeNumSignBitsVectorConstant(V, TyBits)) + if (unsigned VecSignBits = + computeNumSignBitsVectorConstant(V, DemandedElts, TyBits)) return VecSignBits; KnownBits Known(TyBits); - computeKnownBits(V, Known, Depth, Q); + computeKnownBits(V, DemandedElts, Known, Depth, Q); // If we know that the sign bit is either zero or one, determine the number of // identical bits in the top of the input value. @@ -2877,30 +3124,23 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, return false; } -Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS, +Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI) { - const Function *F = ICS.getCalledFunction(); + const Function *F = CB.getCalledFunction(); if (!F) return Intrinsic::not_intrinsic; if (F->isIntrinsic()) return F->getIntrinsicID(); - if (!TLI) - return Intrinsic::not_intrinsic; - + // We are going to infer semantics of a library function based on mapping it + // to an LLVM intrinsic. Check that the library function is available from + // this callbase and in this environment. LibFunc Func; - // We're going to make assumptions on the semantics of the functions, check - // that the target knows that it's available in this environment and it does - // not have local linkage. - if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(*F, Func)) + if (F->hasLocalLinkage() || !TLI || !TLI->getLibFunc(CB, Func) || + !CB.onlyReadsMemory()) return Intrinsic::not_intrinsic; - if (!ICS.onlyReadsMemory()) - return Intrinsic::not_intrinsic; - - // Otherwise check if we have a call to a function that can be turned into a - // vector intrinsic. switch (Func) { default: break; @@ -2972,6 +3212,10 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS, case LibFunc_roundf: case LibFunc_roundl: return Intrinsic::round; + case LibFunc_roundeven: + case LibFunc_roundevenf: + case LibFunc_roundevenl: + return Intrinsic::roundeven; case LibFunc_pow: case LibFunc_powf: case LibFunc_powl: @@ -2987,6 +3231,9 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS, /// Return true if we can prove that the specified FP value is never equal to /// -0.0. +/// NOTE: Do not check 'nsz' here because that fast-math-flag does not guarantee +/// that a value is not -0.0. It only guarantees that -0.0 may be treated +/// the same as +0.0 in floating-point ops. /// /// NOTE: this function will need to be revisited when we support non-default /// rounding modes! @@ -3003,11 +3250,6 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, if (!Op) return false; - // Check if the nsz fast-math flag is set. - if (auto *FPO = dyn_cast<FPMathOperator>(Op)) - if (FPO->hasNoSignedZeros()) - return true; - // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0. if (match(Op, m_FAdd(m_Value(), m_PosZeroFP()))) return true; @@ -3017,7 +3259,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, return true; if (auto *Call = dyn_cast<CallInst>(Op)) { - Intrinsic::ID IID = getIntrinsicForCallSite(Call, TLI); + Intrinsic::ID IID = getIntrinsicForCallSite(*Call, TLI); switch (IID) { default: break; @@ -3053,8 +3295,8 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V, // Handle vector of constants. if (auto *CV = dyn_cast<Constant>(V)) { - if (CV->getType()->isVectorTy()) { - unsigned NumElts = CV->getType()->getVectorNumElements(); + if (auto *CVFVTy = dyn_cast<FixedVectorType>(CV->getType())) { + unsigned NumElts = CVFVTy->getNumElements(); for (unsigned i = 0; i != NumElts; ++i) { auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i)); if (!CFP) @@ -3083,14 +3325,15 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V, case Instruction::UIToFP: return true; case Instruction::FMul: - // x*x is always non-negative or a NaN. + case Instruction::FDiv: + // X * X is always non-negative or a NaN. + // X / X is always exactly 1.0 or a NaN. if (I->getOperand(0) == I->getOperand(1) && (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs())) return true; LLVM_FALLTHROUGH; case Instruction::FAdd: - case Instruction::FDiv: case Instruction::FRem: return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, Depth + 1) && @@ -3114,17 +3357,32 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V, Depth + 1); case Instruction::Call: const auto *CI = cast<CallInst>(I); - Intrinsic::ID IID = getIntrinsicForCallSite(CI, TLI); + Intrinsic::ID IID = getIntrinsicForCallSite(*CI, TLI); switch (IID) { default: break; - case Intrinsic::maxnum: - return (isKnownNeverNaN(I->getOperand(0), TLI) && - cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, - SignBitOnly, Depth + 1)) || - (isKnownNeverNaN(I->getOperand(1), TLI) && - cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, - SignBitOnly, Depth + 1)); + case Intrinsic::maxnum: { + Value *V0 = I->getOperand(0), *V1 = I->getOperand(1); + auto isPositiveNum = [&](Value *V) { + if (SignBitOnly) { + // With SignBitOnly, this is tricky because the result of + // maxnum(+0.0, -0.0) is unspecified. Just check if the operand is + // a constant strictly greater than 0.0. + const APFloat *C; + return match(V, m_APFloat(C)) && + *C > APFloat::getZero(C->getSemantics()); + } + + // -0.0 compares equal to 0.0, so if this operand is at least -0.0, + // maxnum can't be ordered-less-than-zero. + return isKnownNeverNaN(V, TLI) && + cannotBeOrderedLessThanZeroImpl(V, TLI, false, Depth + 1); + }; + + // TODO: This could be improved. We could also check that neither operand + // has its sign bit set (and at least 1 is not-NAN?). + return isPositiveNum(V0) || isPositiveNum(V1); + } case Intrinsic::maximum: return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, @@ -3225,24 +3483,26 @@ bool llvm::isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI, } } - // Bail out for constant expressions, but try to handle vector constants. - if (!V->getType()->isVectorTy() || !isa<Constant>(V)) - return false; - - // For vectors, verify that each element is not infinity. - unsigned NumElts = V->getType()->getVectorNumElements(); - for (unsigned i = 0; i != NumElts; ++i) { - Constant *Elt = cast<Constant>(V)->getAggregateElement(i); - if (!Elt) - return false; - if (isa<UndefValue>(Elt)) - continue; - auto *CElt = dyn_cast<ConstantFP>(Elt); - if (!CElt || CElt->isInfinity()) - return false; + // try to handle fixed width vector constants + if (isa<FixedVectorType>(V->getType()) && isa<Constant>(V)) { + // For vectors, verify that each element is not infinity. + unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); + for (unsigned i = 0; i != NumElts; ++i) { + Constant *Elt = cast<Constant>(V)->getAggregateElement(i); + if (!Elt) + return false; + if (isa<UndefValue>(Elt)) + continue; + auto *CElt = dyn_cast<ConstantFP>(Elt); + if (!CElt || CElt->isInfinity()) + return false; + } + // All elements were confirmed non-infinity or undefined. + return true; } - // All elements were confirmed non-infinity or undefined. - return true; + + // was not able to prove that V never contains infinity + return false; } bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, @@ -3312,6 +3572,7 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, case Intrinsic::rint: case Intrinsic::nearbyint: case Intrinsic::round: + case Intrinsic::roundeven: return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1); case Intrinsic::sqrt: return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) && @@ -3326,24 +3587,26 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, } } - // Bail out for constant expressions, but try to handle vector constants. - if (!V->getType()->isVectorTy() || !isa<Constant>(V)) - return false; - - // For vectors, verify that each element is not NaN. - unsigned NumElts = V->getType()->getVectorNumElements(); - for (unsigned i = 0; i != NumElts; ++i) { - Constant *Elt = cast<Constant>(V)->getAggregateElement(i); - if (!Elt) - return false; - if (isa<UndefValue>(Elt)) - continue; - auto *CElt = dyn_cast<ConstantFP>(Elt); - if (!CElt || CElt->isNaN()) - return false; + // Try to handle fixed width vector constants + if (isa<FixedVectorType>(V->getType()) && isa<Constant>(V)) { + // For vectors, verify that each element is not NaN. + unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); + for (unsigned i = 0; i != NumElts; ++i) { + Constant *Elt = cast<Constant>(V)->getAggregateElement(i); + if (!Elt) + return false; + if (isa<UndefValue>(Elt)) + continue; + auto *CElt = dyn_cast<ConstantFP>(Elt); + if (!CElt || CElt->isNaN()) + return false; + } + // All elements were confirmed not-NaN or undefined. + return true; } - // All elements were confirmed not-NaN or undefined. - return true; + + // Was not able to prove that V never contains NaN + return false; } Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { @@ -3359,8 +3622,8 @@ Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { if (isa<UndefValue>(V)) return UndefInt8; - const uint64_t Size = DL.getTypeStoreSize(V->getType()); - if (!Size) + // Return Undef for zero-sized type. + if (!DL.getTypeStoreSize(V->getType()).isNonZero()) return UndefInt8; Constant *C = dyn_cast<Constant>(V); @@ -3678,7 +3941,7 @@ bool llvm::getConstantDataArrayInfo(const Value *V, Array = nullptr; } else { const DataLayout &DL = GV->getParent()->getDataLayout(); - uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy); + uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy).getFixedSize(); uint64_t Length = SizeInBytes / (ElementSize / 8); if (Length <= Offset) return false; @@ -3839,12 +4102,17 @@ llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call, bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( const CallBase *Call, bool MustPreserveNullness) { - return Call->getIntrinsicID() == Intrinsic::launder_invariant_group || - Call->getIntrinsicID() == Intrinsic::strip_invariant_group || - Call->getIntrinsicID() == Intrinsic::aarch64_irg || - Call->getIntrinsicID() == Intrinsic::aarch64_tagp || - (!MustPreserveNullness && - Call->getIntrinsicID() == Intrinsic::ptrmask); + switch (Call->getIntrinsicID()) { + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: + case Intrinsic::aarch64_irg: + case Intrinsic::aarch64_tagp: + return true; + case Intrinsic::ptrmask: + return !MustPreserveNullness; + default: + return false; + } } /// \p PN defines a loop-variant pointer to an object. Check if the @@ -3884,15 +4152,20 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, } else if (Operator::getOpcode(V) == Instruction::BitCast || Operator::getOpcode(V) == Instruction::AddrSpaceCast) { V = cast<Operator>(V)->getOperand(0); + if (!V->getType()->isPointerTy()) + return V; } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { if (GA->isInterposable()) return V; V = GA->getAliasee(); - } else if (isa<AllocaInst>(V)) { - // An alloca can't be further simplified. - return V; } else { - if (auto *Call = dyn_cast<CallBase>(V)) { + if (auto *PHI = dyn_cast<PHINode>(V)) { + // Look through single-arg phi nodes created by LCSSA. + if (PHI->getNumIncomingValues() == 1) { + V = PHI->getIncomingValue(0); + continue; + } + } else if (auto *Call = dyn_cast<CallBase>(V)) { // CaptureTracking can know about special capturing properties of some // intrinsics like launder.invariant.group, that can't be expressed with // the attributes, but have properties like returning aliasing pointer. @@ -3908,14 +4181,6 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, } } - // See if InstructionSimplify knows any relevant tricks. - if (Instruction *I = dyn_cast<Instruction>(V)) - // TODO: Acquire a DominatorTree and AssumptionCache and use them. - if (Value *Simplified = SimplifyInstruction(I, {DL, I})) { - V = Simplified; - continue; - } - return V; } assert(V->getType()->isPointerTy() && "Unexpected operand type!"); @@ -4309,6 +4574,16 @@ OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { + // Checking for conditions implied by dominating conditions may be expensive. + // Limit it to usub_with_overflow calls for now. + if (match(CxtI, + m_Intrinsic<Intrinsic::usub_with_overflow>(m_Value(), m_Value()))) + if (auto C = + isImpliedByDomCondition(CmpInst::ICMP_UGE, LHS, RHS, CxtI, DL)) { + if (*C) + return OverflowResult::NeverOverflows; + return OverflowResult::AlwaysOverflowsLow; + } ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT); ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( @@ -4385,7 +4660,100 @@ bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO, return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch); } -bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V) { +bool llvm::canCreatePoison(const Instruction *I) { + // See whether I has flags that may create poison + if (isa<OverflowingBinaryOperator>(I) && + (I->hasNoSignedWrap() || I->hasNoUnsignedWrap())) + return true; + if (isa<PossiblyExactOperator>(I) && I->isExact()) + return true; + if (auto *FP = dyn_cast<FPMathOperator>(I)) { + auto FMF = FP->getFastMathFlags(); + if (FMF.noNaNs() || FMF.noInfs()) + return true; + } + if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) + if (GEP->isInBounds()) + return true; + + unsigned Opcode = I->getOpcode(); + + // Check whether opcode is a poison-generating operation + switch (Opcode) { + case Instruction::Shl: + case Instruction::AShr: + case Instruction::LShr: { + // Shifts return poison if shiftwidth is larger than the bitwidth. + if (auto *C = dyn_cast<Constant>(I->getOperand(1))) { + SmallVector<Constant *, 4> ShiftAmounts; + if (auto *FVTy = dyn_cast<FixedVectorType>(C->getType())) { + unsigned NumElts = FVTy->getNumElements(); + for (unsigned i = 0; i < NumElts; ++i) + ShiftAmounts.push_back(C->getAggregateElement(i)); + } else if (isa<ScalableVectorType>(C->getType())) + return true; // Can't tell, just return true to be safe + else + ShiftAmounts.push_back(C); + + bool Safe = llvm::all_of(ShiftAmounts, [](Constant *C) { + auto *CI = dyn_cast<ConstantInt>(C); + return CI && CI->getZExtValue() < C->getType()->getIntegerBitWidth(); + }); + return !Safe; + } + return true; + } + case Instruction::FPToSI: + case Instruction::FPToUI: + // fptosi/ui yields poison if the resulting value does not fit in the + // destination type. + return true; + case Instruction::Call: + case Instruction::CallBr: + case Instruction::Invoke: + // Function calls can return a poison value even if args are non-poison + // values. + return true; + case Instruction::InsertElement: + case Instruction::ExtractElement: { + // If index exceeds the length of the vector, it returns poison + auto *VTy = cast<VectorType>(I->getOperand(0)->getType()); + unsigned IdxOp = I->getOpcode() == Instruction::InsertElement ? 2 : 1; + auto *Idx = dyn_cast<ConstantInt>(I->getOperand(IdxOp)); + if (!Idx || Idx->getZExtValue() >= VTy->getElementCount().Min) + return true; + return false; + } + case Instruction::FNeg: + case Instruction::PHI: + case Instruction::Select: + case Instruction::URem: + case Instruction::SRem: + case Instruction::ShuffleVector: + case Instruction::ExtractValue: + case Instruction::InsertValue: + case Instruction::Freeze: + case Instruction::ICmp: + case Instruction::FCmp: + case Instruction::GetElementPtr: + return false; + default: + if (isa<CastInst>(I)) + return false; + else if (isa<BinaryOperator>(I)) + return false; + // Be conservative and return true. + return true; + } +} + +bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, + const Instruction *CtxI, + const DominatorTree *DT, + unsigned Depth) { + if (Depth >= MaxDepth) + return false; + // If the value is a freeze instruction, then it can never // be undef or poison. if (isa<FreezeInst>(V)) @@ -4393,10 +4761,100 @@ bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V) { // TODO: Some instructions are guaranteed to return neither undef // nor poison if their arguments are not poison/undef. - // TODO: Deal with other Constant subclasses. - if (isa<ConstantInt>(V) || isa<GlobalVariable>(V)) + if (auto *C = dyn_cast<Constant>(V)) { + // TODO: We can analyze ConstExpr by opcode to determine if there is any + // possibility of poison. + if (isa<UndefValue>(C) || isa<ConstantExpr>(C)) + return false; + + if (isa<ConstantInt>(C) || isa<GlobalVariable>(C) || isa<ConstantFP>(V) || + isa<ConstantPointerNull>(C) || isa<Function>(C)) + return true; + + if (C->getType()->isVectorTy()) + return !C->containsUndefElement() && !C->containsConstantExpression(); + + // TODO: Recursively analyze aggregates or other constants. + return false; + } + + // Strip cast operations from a pointer value. + // Note that stripPointerCastsSameRepresentation can strip off getelementptr + // inbounds with zero offset. To guarantee that the result isn't poison, the + // stripped pointer is checked as it has to be pointing into an allocated + // object or be null `null` to ensure `inbounds` getelement pointers with a + // zero offset could not produce poison. + // It can strip off addrspacecast that do not change bit representation as + // well. We believe that such addrspacecast is equivalent to no-op. + auto *StrippedV = V->stripPointerCastsSameRepresentation(); + if (isa<AllocaInst>(StrippedV) || isa<GlobalVariable>(StrippedV) || + isa<Function>(StrippedV) || isa<ConstantPointerNull>(StrippedV)) return true; + auto OpCheck = [&](const Value *V) { + return isGuaranteedNotToBeUndefOrPoison(V, CtxI, DT, Depth + 1); + }; + + if (auto *I = dyn_cast<Instruction>(V)) { + switch (I->getOpcode()) { + case Instruction::GetElementPtr: { + auto *GEPI = dyn_cast<GetElementPtrInst>(I); + if (!GEPI->isInBounds() && llvm::all_of(GEPI->operands(), OpCheck)) + return true; + break; + } + case Instruction::FCmp: { + auto *FI = dyn_cast<FCmpInst>(I); + if (FI->getFastMathFlags().none() && + llvm::all_of(FI->operands(), OpCheck)) + return true; + break; + } + case Instruction::BitCast: + case Instruction::PHI: + case Instruction::ICmp: + if (llvm::all_of(I->operands(), OpCheck)) + return true; + break; + default: + break; + } + + if (programUndefinedIfPoison(I) && I->getType()->isIntegerTy(1)) + // Note: once we have an agreement that poison is a value-wise concept, + // we can remove the isIntegerTy(1) constraint. + return true; + } + + // CxtI may be null or a cloned instruction. + if (!CtxI || !CtxI->getParent() || !DT) + return false; + + auto *DNode = DT->getNode(CtxI->getParent()); + if (!DNode) + // Unreachable block + return false; + + // If V is used as a branch condition before reaching CtxI, V cannot be + // undef or poison. + // br V, BB1, BB2 + // BB1: + // CtxI ; V cannot be undef or poison here + auto *Dominator = DNode->getIDom(); + while (Dominator) { + auto *TI = Dominator->getBlock()->getTerminator(); + + if (auto BI = dyn_cast<BranchInst>(TI)) { + if (BI->isConditional() && BI->getCondition() == V) + return true; + } else if (auto SI = dyn_cast<SwitchInst>(TI)) { + if (SI->getCondition() == V) + return true; + } + + Dominator = Dominator->getIDom(); + } + return false; } @@ -4436,14 +4894,14 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { return false; // Calls can throw, or contain an infinite loop, or kill the process. - if (auto CS = ImmutableCallSite(I)) { + if (const auto *CB = dyn_cast<CallBase>(I)) { // Call sites that throw have implicit non-local control flow. - if (!CS.doesNotThrow()) + if (!CB->doesNotThrow()) return false; // A function which doens't throw and has "willreturn" attribute will // always return. - if (CS.hasFnAttr(Attribute::WillReturn)) + if (CB->hasFnAttr(Attribute::WillReturn)) return true; // Non-throwing call sites can loop infinitely, call exit/pthread_exit @@ -4462,7 +4920,7 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { // FIXME: This isn't aggressive enough; a call which only writes to a global // is guaranteed to return. - return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory(); + return CB->onlyReadsMemory() || CB->onlyAccessesArgMemory(); } // Other instructions return normally. @@ -4493,41 +4951,28 @@ bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, llvm_unreachable("Instruction not contained in its own parent basic block."); } -bool llvm::propagatesFullPoison(const Instruction *I) { - // TODO: This should include all instructions apart from phis, selects and - // call-like instructions. +bool llvm::propagatesPoison(const Instruction *I) { switch (I->getOpcode()) { - case Instruction::Add: - case Instruction::Sub: - case Instruction::Xor: - case Instruction::Trunc: - case Instruction::BitCast: - case Instruction::AddrSpaceCast: - case Instruction::Mul: - case Instruction::Shl: - case Instruction::GetElementPtr: - // These operations all propagate poison unconditionally. Note that poison - // is not any particular value, so xor or subtraction of poison with - // itself still yields poison, not zero. - return true; - - case Instruction::AShr: - case Instruction::SExt: - // For these operations, one bit of the input is replicated across - // multiple output bits. A replicated poison bit is still poison. - return true; - + case Instruction::Freeze: + case Instruction::Select: + case Instruction::PHI: + case Instruction::Call: + case Instruction::Invoke: + return false; case Instruction::ICmp: - // Comparing poison with any value yields poison. This is why, for - // instance, x s< (x +nsw 1) can be folded to true. + case Instruction::FCmp: + case Instruction::GetElementPtr: return true; - default: + if (isa<BinaryOperator>(I) || isa<UnaryOperator>(I) || isa<CastInst>(I)) + return true; + + // Be conservative and return false. return false; } } -const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) { +const Value *llvm::getGuaranteedNonPoisonOp(const Instruction *I) { switch (I->getOpcode()) { case Instruction::Store: return cast<StoreInst>(I)->getPointerOperand(); @@ -4547,23 +4992,30 @@ const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) { case Instruction::SRem: return I->getOperand(1); + case Instruction::Call: + if (auto *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::assume: + return II->getArgOperand(0); + default: + return nullptr; + } + } + return nullptr; + default: - // Note: It's really tempting to think that a conditional branch or - // switch should be listed here, but that's incorrect. It's not - // branching off of poison which is UB, it is executing a side effecting - // instruction which follows the branch. return nullptr; } } bool llvm::mustTriggerUB(const Instruction *I, const SmallSet<const Value *, 16>& KnownPoison) { - auto *NotPoison = getGuaranteedNonFullPoisonOp(I); + auto *NotPoison = getGuaranteedNonPoisonOp(I); return (NotPoison && KnownPoison.count(NotPoison)); } -bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) { +bool llvm::programUndefinedIfPoison(const Instruction *PoisonI) { // We currently only look for uses of poison values within the same basic // block, as that makes it easier to guarantee that the uses will be // executed given that PoisonI is executed. @@ -4596,7 +5048,7 @@ bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) { if (YieldsPoison.count(&I)) { for (const User *User : I.users()) { const Instruction *UserI = cast<Instruction>(User); - if (propagatesFullPoison(UserI)) + if (propagatesPoison(UserI)) YieldsPoison.insert(User); } } @@ -4633,6 +5085,9 @@ static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) { return true; } + if (isa<ConstantAggregateZero>(V)) + return true; + return false; } @@ -4689,7 +5144,7 @@ static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred, if (match(FalseVal, m_CombineOr(m_OrdFMin(m_Specific(CmpLHS), m_APFloat(FC2)), m_UnordFMin(m_Specific(CmpLHS), m_APFloat(FC2)))) && - FC1->compare(*FC2) == APFloat::cmpResult::cmpLessThan) + *FC1 < *FC2) return {SPF_FMAXNUM, SPNB_RETURNS_ANY, false}; break; case CmpInst::FCMP_OGT: @@ -4699,7 +5154,7 @@ static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred, if (match(FalseVal, m_CombineOr(m_OrdFMax(m_Specific(CmpLHS), m_APFloat(FC2)), m_UnordFMax(m_Specific(CmpLHS), m_APFloat(FC2)))) && - FC1->compare(*FC2) == APFloat::cmpResult::cmpGreaterThan) + *FC1 > *FC2) return {SPF_FMINNUM, SPNB_RETURNS_ANY, false}; break; default: @@ -4840,6 +5295,21 @@ static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred, return {SPF_UNKNOWN, SPNB_NA, false}; } +/// If the input value is the result of a 'not' op, constant integer, or vector +/// splat of a constant integer, return the bitwise-not source value. +/// TODO: This could be extended to handle non-splat vector integer constants. +static Value *getNotValue(Value *V) { + Value *NotV; + if (match(V, m_Not(m_Value(NotV)))) + return NotV; + + const APInt *C; + if (match(V, m_APInt(C))) + return ConstantInt::get(V->getType(), ~(*C)); + + return nullptr; +} + /// Match non-obvious integer minimum and maximum sequences. static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, Value *CmpLHS, Value *CmpRHS, @@ -4858,6 +5328,31 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) return SPR; + // Look through 'not' ops to find disguised min/max. + // (X > Y) ? ~X : ~Y ==> (~X < ~Y) ? ~X : ~Y ==> MIN(~X, ~Y) + // (X < Y) ? ~X : ~Y ==> (~X > ~Y) ? ~X : ~Y ==> MAX(~X, ~Y) + if (CmpLHS == getNotValue(TrueVal) && CmpRHS == getNotValue(FalseVal)) { + switch (Pred) { + case CmpInst::ICMP_SGT: return {SPF_SMIN, SPNB_NA, false}; + case CmpInst::ICMP_SLT: return {SPF_SMAX, SPNB_NA, false}; + case CmpInst::ICMP_UGT: return {SPF_UMIN, SPNB_NA, false}; + case CmpInst::ICMP_ULT: return {SPF_UMAX, SPNB_NA, false}; + default: break; + } + } + + // (X > Y) ? ~Y : ~X ==> (~X < ~Y) ? ~Y : ~X ==> MAX(~Y, ~X) + // (X < Y) ? ~Y : ~X ==> (~X > ~Y) ? ~Y : ~X ==> MIN(~Y, ~X) + if (CmpLHS == getNotValue(FalseVal) && CmpRHS == getNotValue(TrueVal)) { + switch (Pred) { + case CmpInst::ICMP_SGT: return {SPF_SMAX, SPNB_NA, false}; + case CmpInst::ICMP_SLT: return {SPF_SMIN, SPNB_NA, false}; + case CmpInst::ICMP_UGT: return {SPF_UMAX, SPNB_NA, false}; + case CmpInst::ICMP_ULT: return {SPF_UMIN, SPNB_NA, false}; + default: break; + } + } + if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT) return {SPF_UNKNOWN, SPNB_NA, false}; @@ -4898,19 +5393,6 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; } - // Look through 'not' ops to find disguised signed min/max. - // (X >s C) ? ~X : ~C ==> (~X <s ~C) ? ~X : ~C ==> SMIN(~X, ~C) - // (X <s C) ? ~X : ~C ==> (~X >s ~C) ? ~X : ~C ==> SMAX(~X, ~C) - if (match(TrueVal, m_Not(m_Specific(CmpLHS))) && - match(FalseVal, m_APInt(C2)) && ~(*C1) == *C2) - return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false}; - - // (X >s C) ? ~C : ~X ==> (~X <s ~C) ? ~C : ~X ==> SMAX(~C, ~X) - // (X <s C) ? ~C : ~X ==> (~X >s ~C) ? ~C : ~X ==> SMIN(~C, ~X) - if (match(FalseVal, m_Not(m_Specific(CmpLHS))) && - match(TrueVal, m_APInt(C2)) && ~(*C1) == *C2) - return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false}; - return {SPF_UNKNOWN, SPNB_NA, false}; } @@ -5445,20 +5927,18 @@ isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, /// Return true if LHS implies RHS is true. Return false if LHS implies RHS is /// false. Otherwise, return None if we can't infer anything. static Optional<bool> isImpliedCondICmps(const ICmpInst *LHS, - const ICmpInst *RHS, + CmpInst::Predicate BPred, + const Value *BLHS, const Value *BRHS, const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { Value *ALHS = LHS->getOperand(0); Value *ARHS = LHS->getOperand(1); + // The rest of the logic assumes the LHS condition is true. If that's not the // case, invert the predicate to make it so. - ICmpInst::Predicate APred = + CmpInst::Predicate APred = LHSIsTrue ? LHS->getPredicate() : LHS->getInversePredicate(); - Value *BLHS = RHS->getOperand(0); - Value *BRHS = RHS->getOperand(1); - ICmpInst::Predicate BPred = RHS->getPredicate(); - // Can we infer anything when the two compares have matching operands? bool AreSwappedOps; if (isMatchingOps(ALHS, ARHS, BLHS, BRHS, AreSwappedOps)) { @@ -5489,10 +5969,11 @@ static Optional<bool> isImpliedCondICmps(const ICmpInst *LHS, /// Return true if LHS implies RHS is true. Return false if LHS implies RHS is /// false. Otherwise, return None if we can't infer anything. We expect the /// RHS to be an icmp and the LHS to be an 'and' or an 'or' instruction. -static Optional<bool> isImpliedCondAndOr(const BinaryOperator *LHS, - const ICmpInst *RHS, - const DataLayout &DL, bool LHSIsTrue, - unsigned Depth) { +static Optional<bool> +isImpliedCondAndOr(const BinaryOperator *LHS, CmpInst::Predicate RHSPred, + const Value *RHSOp0, const Value *RHSOp1, + + const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { // The LHS must be an 'or' or an 'and' instruction. assert((LHS->getOpcode() == Instruction::And || LHS->getOpcode() == Instruction::Or) && @@ -5507,36 +5988,33 @@ static Optional<bool> isImpliedCondAndOr(const BinaryOperator *LHS, if ((!LHSIsTrue && match(LHS, m_Or(m_Value(ALHS), m_Value(ARHS)))) || (LHSIsTrue && match(LHS, m_And(m_Value(ALHS), m_Value(ARHS))))) { // FIXME: Make this non-recursion. - if (Optional<bool> Implication = - isImpliedCondition(ALHS, RHS, DL, LHSIsTrue, Depth + 1)) + if (Optional<bool> Implication = isImpliedCondition( + ALHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1)) return Implication; - if (Optional<bool> Implication = - isImpliedCondition(ARHS, RHS, DL, LHSIsTrue, Depth + 1)) + if (Optional<bool> Implication = isImpliedCondition( + ARHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1)) return Implication; return None; } return None; } -Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS, - const DataLayout &DL, bool LHSIsTrue, - unsigned Depth) { +Optional<bool> +llvm::isImpliedCondition(const Value *LHS, CmpInst::Predicate RHSPred, + const Value *RHSOp0, const Value *RHSOp1, + const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { // Bail out when we hit the limit. if (Depth == MaxDepth) return None; // A mismatch occurs when we compare a scalar cmp to a vector cmp, for // example. - if (LHS->getType() != RHS->getType()) + if (RHSOp0->getType()->isVectorTy() != LHS->getType()->isVectorTy()) return None; Type *OpTy = LHS->getType(); assert(OpTy->isIntOrIntVectorTy(1) && "Expected integer type only!"); - // LHS ==> RHS by definition - if (LHS == RHS) - return LHSIsTrue; - // FIXME: Extending the code below to handle vectors. if (OpTy->isVectorTy()) return None; @@ -5545,51 +6023,87 @@ Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS, // Both LHS and RHS are icmps. const ICmpInst *LHSCmp = dyn_cast<ICmpInst>(LHS); - const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(RHS); - if (LHSCmp && RHSCmp) - return isImpliedCondICmps(LHSCmp, RHSCmp, DL, LHSIsTrue, Depth); + if (LHSCmp) + return isImpliedCondICmps(LHSCmp, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, + Depth); - // The LHS should be an 'or' or an 'and' instruction. We expect the RHS to be - // an icmp. FIXME: Add support for and/or on the RHS. + /// The LHS should be an 'or' or an 'and' instruction. We expect the RHS to + /// be / an icmp. FIXME: Add support for and/or on the RHS. const BinaryOperator *LHSBO = dyn_cast<BinaryOperator>(LHS); - if (LHSBO && RHSCmp) { + if (LHSBO) { if ((LHSBO->getOpcode() == Instruction::And || LHSBO->getOpcode() == Instruction::Or)) - return isImpliedCondAndOr(LHSBO, RHSCmp, DL, LHSIsTrue, Depth); + return isImpliedCondAndOr(LHSBO, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, + Depth); } return None; } -Optional<bool> llvm::isImpliedByDomCondition(const Value *Cond, - const Instruction *ContextI, - const DataLayout &DL) { - assert(Cond->getType()->isIntOrIntVectorTy(1) && "Condition must be bool"); +Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS, + const DataLayout &DL, bool LHSIsTrue, + unsigned Depth) { + // LHS ==> RHS by definition + if (LHS == RHS) + return LHSIsTrue; + + const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(RHS); + if (RHSCmp) + return isImpliedCondition(LHS, RHSCmp->getPredicate(), + RHSCmp->getOperand(0), RHSCmp->getOperand(1), DL, + LHSIsTrue, Depth); + return None; +} + +// Returns a pair (Condition, ConditionIsTrue), where Condition is a branch +// condition dominating ContextI or nullptr, if no condition is found. +static std::pair<Value *, bool> +getDomPredecessorCondition(const Instruction *ContextI) { if (!ContextI || !ContextI->getParent()) - return None; + return {nullptr, false}; // TODO: This is a poor/cheap way to determine dominance. Should we use a // dominator tree (eg, from a SimplifyQuery) instead? const BasicBlock *ContextBB = ContextI->getParent(); const BasicBlock *PredBB = ContextBB->getSinglePredecessor(); if (!PredBB) - return None; + return {nullptr, false}; // We need a conditional branch in the predecessor. Value *PredCond; BasicBlock *TrueBB, *FalseBB; if (!match(PredBB->getTerminator(), m_Br(m_Value(PredCond), TrueBB, FalseBB))) - return None; + return {nullptr, false}; // The branch should get simplified. Don't bother simplifying this condition. if (TrueBB == FalseBB) - return None; + return {nullptr, false}; assert((TrueBB == ContextBB || FalseBB == ContextBB) && "Predecessor block does not point to successor?"); // Is this condition implied by the predecessor condition? - bool CondIsTrue = TrueBB == ContextBB; - return isImpliedCondition(PredCond, Cond, DL, CondIsTrue); + return {PredCond, TrueBB == ContextBB}; +} + +Optional<bool> llvm::isImpliedByDomCondition(const Value *Cond, + const Instruction *ContextI, + const DataLayout &DL) { + assert(Cond->getType()->isIntOrIntVectorTy(1) && "Condition must be bool"); + auto PredCond = getDomPredecessorCondition(ContextI); + if (PredCond.first) + return isImpliedCondition(PredCond.first, Cond, DL, PredCond.second); + return None; +} + +Optional<bool> llvm::isImpliedByDomCondition(CmpInst::Predicate Pred, + const Value *LHS, const Value *RHS, + const Instruction *ContextI, + const DataLayout &DL) { + auto PredCond = getDomPredecessorCondition(ContextI); + if (PredCond.first) + return isImpliedCondition(PredCond.first, Pred, LHS, RHS, DL, + PredCond.second); + return None; } static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, @@ -5861,9 +6375,15 @@ static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower, } } -ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) { +ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo, + AssumptionCache *AC, + const Instruction *CtxI, + unsigned Depth) { assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction"); + if (Depth == MaxDepth) + return ConstantRange::getFull(V->getType()->getScalarSizeInBits()); + const APInt *C; if (match(V, m_APInt(C))) return ConstantRange(*C); @@ -5885,6 +6405,31 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) { if (auto *Range = IIQ.getMetadata(I, LLVMContext::MD_range)) CR = CR.intersectWith(getConstantRangeFromMetadata(*Range)); + if (CtxI && AC) { + // Try to restrict the range based on information from assumptions. + for (auto &AssumeVH : AC->assumptionsFor(V)) { + if (!AssumeVH) + continue; + CallInst *I = cast<CallInst>(AssumeVH); + assert(I->getParent()->getParent() == CtxI->getParent()->getParent() && + "Got assumption for the wrong function!"); + assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && + "must be an assume intrinsic"); + + if (!isValidAssumeForContext(I, CtxI, nullptr)) + continue; + Value *Arg = I->getArgOperand(0); + ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg); + // Currently we just use information from comparisons. + if (!Cmp || Cmp->getOperand(0) != V) + continue; + ConstantRange RHS = computeConstantRange(Cmp->getOperand(1), UseInstrInfo, + AC, I, Depth + 1); + CR = CR.intersectWith( + ConstantRange::makeSatisfyingICmpRegion(Cmp->getPredicate(), RHS)); + } + } + return CR; } @@ -5910,10 +6455,12 @@ getOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, const DataLayout &DL) { continue; } - // Otherwise, we have a sequential type like an array or vector. Multiply - // the index by the ElementSize. - uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()); - Offset += Size * OpC->getSExtValue(); + // Otherwise, we have a sequential type like an array or fixed-length + // vector. Multiply the index by the ElementSize. + TypeSize Size = DL.getTypeAllocSize(GTI.getIndexedType()); + if (Size.isScalable()) + return None; + Offset += Size.getFixedSize() * OpC->getSExtValue(); } return Offset; diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index c45ab941a1428..23531b65ea32d 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -78,6 +78,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::rint: case Intrinsic::nearbyint: case Intrinsic::round: + case Intrinsic::roundeven: case Intrinsic::pow: case Intrinsic::fma: case Intrinsic::fmuladd: @@ -112,7 +113,7 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, /// its ID, in case it does not found it return not_intrinsic. Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI) { - Intrinsic::ID ID = getIntrinsicForCallSite(CI, TLI); + Intrinsic::ID ID = getIntrinsicForCallSite(*CI, TLI); if (ID == Intrinsic::not_intrinsic) return Intrinsic::not_intrinsic; @@ -262,9 +263,12 @@ Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { Value *llvm::findScalarElement(Value *V, unsigned EltNo) { assert(V->getType()->isVectorTy() && "Not looking at a vector?"); VectorType *VTy = cast<VectorType>(V->getType()); - unsigned Width = VTy->getNumElements(); - if (EltNo >= Width) // Out of range access. - return UndefValue::get(VTy->getElementType()); + // For fixed-length vector, return undef for out of range access. + if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { + unsigned Width = FVTy->getNumElements(); + if (EltNo >= Width) + return UndefValue::get(FVTy->getElementType()); + } if (Constant *C = dyn_cast<Constant>(V)) return C->getAggregateElement(EltNo); @@ -285,8 +289,11 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) { return findScalarElement(III->getOperand(0), EltNo); } - if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { - unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements(); + ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V); + // Restrict the following transformation to fixed-length vector. + if (SVI && isa<FixedVectorType>(SVI->getType())) { + unsigned LHSWidth = + cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements(); int InEl = SVI->getMaskValue(EltNo); if (InEl < 0) return UndefValue::get(VTy->getElementType()); @@ -307,6 +314,24 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) { return nullptr; } +int llvm::getSplatIndex(ArrayRef<int> Mask) { + int SplatIndex = -1; + for (int M : Mask) { + // Ignore invalid (undefined) mask elements. + if (M < 0) + continue; + + // There can be only 1 non-negative mask element value if this is a splat. + if (SplatIndex != -1 && SplatIndex != M) + return -1; + + // Initialize the splat index to the 1st non-negative mask element. + SplatIndex = M; + } + assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?"); + return SplatIndex; +} + /// Get splat value if the input is a splat vector or return nullptr. /// This function is not fully general. It checks only 2 cases: /// the input value is (1) a splat constant vector or (2) a sequence @@ -318,9 +343,9 @@ const llvm::Value *llvm::getSplatValue(const Value *V) { // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...> Value *Splat; - if (match(V, m_ShuffleVector(m_InsertElement(m_Value(), m_Value(Splat), - m_ZeroInt()), - m_Value(), m_ZeroInt()))) + if (match(V, + m_Shuffle(m_InsertElt(m_Value(), m_Value(Splat), m_ZeroInt()), + m_Value(), m_ZeroMask()))) return Splat; return nullptr; @@ -330,21 +355,32 @@ const llvm::Value *llvm::getSplatValue(const Value *V) { // adjusted if needed. const unsigned MaxDepth = 6; -bool llvm::isSplatValue(const Value *V, unsigned Depth) { +bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) { assert(Depth <= MaxDepth && "Limit Search Depth"); if (isa<VectorType>(V->getType())) { if (isa<UndefValue>(V)) return true; - // FIXME: Constant splat analysis does not allow undef elements. + // FIXME: We can allow undefs, but if Index was specified, we may want to + // check that the constant is defined at that index. if (auto *C = dyn_cast<Constant>(V)) return C->getSplatValue() != nullptr; } - // FIXME: Constant splat analysis does not allow undef elements. - Constant *Mask; - if (match(V, m_ShuffleVector(m_Value(), m_Value(), m_Constant(Mask)))) - return Mask->getSplatValue() != nullptr; + if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) { + // FIXME: We can safely allow undefs here. If Index was specified, we will + // check that the mask elt is defined at the required index. + if (!is_splat(Shuf->getShuffleMask())) + return false; + + // Match any index. + if (Index == -1) + return true; + + // Match a specific element. The mask should be defined at and match the + // specified index. + return Shuf->getMaskValue(Index) == Index; + } // The remaining tests are all recursive, so bail out if we hit the limit. if (Depth++ == MaxDepth) @@ -353,18 +389,91 @@ bool llvm::isSplatValue(const Value *V, unsigned Depth) { // If both operands of a binop are splats, the result is a splat. Value *X, *Y, *Z; if (match(V, m_BinOp(m_Value(X), m_Value(Y)))) - return isSplatValue(X, Depth) && isSplatValue(Y, Depth); + return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth); // If all operands of a select are splats, the result is a splat. if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z)))) - return isSplatValue(X, Depth) && isSplatValue(Y, Depth) && - isSplatValue(Z, Depth); + return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) && + isSplatValue(Z, Index, Depth); // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops). return false; } +void llvm::narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask, + SmallVectorImpl<int> &ScaledMask) { + assert(Scale > 0 && "Unexpected scaling factor"); + + // Fast-path: if no scaling, then it is just a copy. + if (Scale == 1) { + ScaledMask.assign(Mask.begin(), Mask.end()); + return; + } + + ScaledMask.clear(); + for (int MaskElt : Mask) { + if (MaskElt >= 0) { + assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= + std::numeric_limits<int32_t>::max() && + "Overflowed 32-bits"); + } + for (int SliceElt = 0; SliceElt != Scale; ++SliceElt) + ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt); + } +} + +bool llvm::widenShuffleMaskElts(int Scale, ArrayRef<int> Mask, + SmallVectorImpl<int> &ScaledMask) { + assert(Scale > 0 && "Unexpected scaling factor"); + + // Fast-path: if no scaling, then it is just a copy. + if (Scale == 1) { + ScaledMask.assign(Mask.begin(), Mask.end()); + return true; + } + + // We must map the original elements down evenly to a type with less elements. + int NumElts = Mask.size(); + if (NumElts % Scale != 0) + return false; + + ScaledMask.clear(); + ScaledMask.reserve(NumElts / Scale); + + // Step through the input mask by splitting into Scale-sized slices. + do { + ArrayRef<int> MaskSlice = Mask.take_front(Scale); + assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice."); + + // The first element of the slice determines how we evaluate this slice. + int SliceFront = MaskSlice.front(); + if (SliceFront < 0) { + // Negative values (undef or other "sentinel" values) must be equal across + // the entire slice. + if (!is_splat(MaskSlice)) + return false; + ScaledMask.push_back(SliceFront); + } else { + // A positive mask element must be cleanly divisible. + if (SliceFront % Scale != 0) + return false; + // Elements of the slice must be consecutive. + for (int i = 1; i < Scale; ++i) + if (MaskSlice[i] != SliceFront + i) + return false; + ScaledMask.push_back(SliceFront / Scale); + } + Mask = Mask.drop_front(Scale); + } while (!Mask.empty()); + + assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask"); + + // All elements of the original mask can be scaled down to map to the elements + // of a mask with wider elements. + return true; +} + MapVector<Instruction *, uint64_t> llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI) { @@ -636,7 +745,7 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) { } Constant * -llvm::createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, +llvm::createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup<Instruction> &Group) { // All 1's means mask is not needed. if (Group.getNumMembers() == Group.getFactor()) @@ -655,52 +764,52 @@ llvm::createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, return ConstantVector::get(Mask); } -Constant *llvm::createReplicatedMask(IRBuilder<> &Builder, - unsigned ReplicationFactor, unsigned VF) { - SmallVector<Constant *, 16> MaskVec; +llvm::SmallVector<int, 16> +llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) { + SmallVector<int, 16> MaskVec; for (unsigned i = 0; i < VF; i++) for (unsigned j = 0; j < ReplicationFactor; j++) - MaskVec.push_back(Builder.getInt32(i)); + MaskVec.push_back(i); - return ConstantVector::get(MaskVec); + return MaskVec; } -Constant *llvm::createInterleaveMask(IRBuilder<> &Builder, unsigned VF, - unsigned NumVecs) { - SmallVector<Constant *, 16> Mask; +llvm::SmallVector<int, 16> llvm::createInterleaveMask(unsigned VF, + unsigned NumVecs) { + SmallVector<int, 16> Mask; for (unsigned i = 0; i < VF; i++) for (unsigned j = 0; j < NumVecs; j++) - Mask.push_back(Builder.getInt32(j * VF + i)); + Mask.push_back(j * VF + i); - return ConstantVector::get(Mask); + return Mask; } -Constant *llvm::createStrideMask(IRBuilder<> &Builder, unsigned Start, - unsigned Stride, unsigned VF) { - SmallVector<Constant *, 16> Mask; +llvm::SmallVector<int, 16> +llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) { + SmallVector<int, 16> Mask; for (unsigned i = 0; i < VF; i++) - Mask.push_back(Builder.getInt32(Start + i * Stride)); + Mask.push_back(Start + i * Stride); - return ConstantVector::get(Mask); + return Mask; } -Constant *llvm::createSequentialMask(IRBuilder<> &Builder, unsigned Start, - unsigned NumInts, unsigned NumUndefs) { - SmallVector<Constant *, 16> Mask; +llvm::SmallVector<int, 16> llvm::createSequentialMask(unsigned Start, + unsigned NumInts, + unsigned NumUndefs) { + SmallVector<int, 16> Mask; for (unsigned i = 0; i < NumInts; i++) - Mask.push_back(Builder.getInt32(Start + i)); + Mask.push_back(Start + i); - Constant *Undef = UndefValue::get(Builder.getInt32Ty()); for (unsigned i = 0; i < NumUndefs; i++) - Mask.push_back(Undef); + Mask.push_back(-1); - return ConstantVector::get(Mask); + return Mask; } /// A helper function for concatenating vectors. This function concatenates two /// vectors having the same element type. If the second vector has fewer /// elements than the first, it is padded with undefs. -static Value *concatenateTwoVectors(IRBuilder<> &Builder, Value *V1, +static Value *concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2) { VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType()); VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType()); @@ -714,16 +823,17 @@ static Value *concatenateTwoVectors(IRBuilder<> &Builder, Value *V1, if (NumElts1 > NumElts2) { // Extend with UNDEFs. - Constant *ExtMask = - createSequentialMask(Builder, 0, NumElts2, NumElts1 - NumElts2); - V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask); + V2 = Builder.CreateShuffleVector( + V2, UndefValue::get(VecTy2), + createSequentialMask(0, NumElts2, NumElts1 - NumElts2)); } - Constant *Mask = createSequentialMask(Builder, 0, NumElts1 + NumElts2, 0); - return Builder.CreateShuffleVector(V1, V2, Mask); + return Builder.CreateShuffleVector( + V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0)); } -Value *llvm::concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) { +Value *llvm::concatenateVectors(IRBuilderBase &Builder, + ArrayRef<Value *> Vecs) { unsigned NumVecs = Vecs.size(); assert(NumVecs > 1 && "Should be at least two vectors"); @@ -756,8 +866,9 @@ bool llvm::maskIsAllZeroOrUndef(Value *Mask) { return false; if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask)) return true; - for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E; - ++I) { + for (unsigned I = 0, + E = cast<VectorType>(ConstMask->getType())->getNumElements(); + I != E; ++I) { if (auto *MaskElt = ConstMask->getAggregateElement(I)) if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt)) continue; @@ -773,8 +884,9 @@ bool llvm::maskIsAllOneOrUndef(Value *Mask) { return false; if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask)) return true; - for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E; - ++I) { + for (unsigned I = 0, + E = cast<VectorType>(ConstMask->getType())->getNumElements(); + I != E; ++I) { if (auto *MaskElt = ConstMask->getAggregateElement(I)) if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt)) continue; @@ -835,13 +947,8 @@ void InterleavedAccessInfo::collectConstStrideAccesses( const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); PointerType *PtrTy = cast<PointerType>(Ptr->getType()); uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); - - // An alignment of 0 means target ABI alignment. - MaybeAlign Alignment = MaybeAlign(getLoadStoreAlignment(&I)); - if (!Alignment) - Alignment = Align(DL.getABITypeAlignment(PtrTy->getElementType())); - - AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, *Alignment); + AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, + getLoadStoreAlignment(&I)); } } @@ -922,7 +1029,7 @@ void InterleavedAccessInfo::analyzeInterleaving( // create a group for B, we continue with the bottom-up algorithm to ensure // we don't break any of B's dependences. InterleaveGroup<Instruction> *Group = nullptr; - if (isStrided(DesB.Stride) && + if (isStrided(DesB.Stride) && (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) { Group = getInterleaveGroup(B); if (!Group) { @@ -1023,8 +1130,8 @@ void InterleavedAccessInfo::analyzeInterleaving( // All members of a predicated interleave-group must have the same predicate, // and currently must reside in the same BB. - BasicBlock *BlockA = A->getParent(); - BasicBlock *BlockB = B->getParent(); + BasicBlock *BlockA = A->getParent(); + BasicBlock *BlockB = B->getParent(); if ((isPredicated(BlockA) || isPredicated(BlockB)) && (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB)) continue; @@ -1127,22 +1234,23 @@ void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() { if (!requiresScalarEpilogue()) return; - // Avoid releasing a Group twice. - SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet; - for (auto &I : InterleaveGroupMap) { - InterleaveGroup<Instruction> *Group = I.second; - if (Group->requiresScalarEpilogue()) - DelSet.insert(Group); - } - for (auto *Ptr : DelSet) { + bool ReleasedGroup = false; + // Release groups requiring scalar epilogues. Note that this also removes them + // from InterleaveGroups. + for (auto *Group : make_early_inc_range(InterleaveGroups)) { + if (!Group->requiresScalarEpilogue()) + continue; LLVM_DEBUG( dbgs() << "LV: Invalidate candidate interleaved group due to gaps that " "require a scalar epilogue (not allowed under optsize) and cannot " "be masked (not enabled). \n"); - releaseGroup(Ptr); + releaseGroup(Group); + ReleasedGroup = true; } - + assert(ReleasedGroup && "At least one group must be invalidated, as a " + "scalar epilogue was required"); + (void)ReleasedGroup; RequiresScalarEpilogue = false; } @@ -1161,6 +1269,18 @@ void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const { } } +std::string VFABI::mangleTLIVectorName(StringRef VectorName, + StringRef ScalarName, unsigned numArgs, + unsigned VF) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << "_ZGV" << VFABI::_LLVM_ << "N" << VF; + for (unsigned I = 0; I < numArgs; ++I) + Out << "v"; + Out << "_" << ScalarName << "(" << VectorName << ")"; + return std::string(Out.str()); +} + void VFABI::getVectorVariantNames( const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) { const StringRef S = @@ -1174,12 +1294,13 @@ void VFABI::getVectorVariantNames( for (auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) { #ifndef NDEBUG - Optional<VFInfo> Info = VFABI::tryDemangleForVFABI(S); + LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n"); + Optional<VFInfo> Info = VFABI::tryDemangleForVFABI(S, *(CI.getModule())); assert(Info.hasValue() && "Invalid name for a VFABI variant."); assert(CI.getModule()->getFunction(Info.getValue().VectorName) && "Vector function is missing."); #endif - VariantMappings.push_back(S); + VariantMappings.push_back(std::string(S)); } } diff --git a/llvm/lib/Analysis/models/inliner/saved_model.pbtxt b/llvm/lib/Analysis/models/inliner/saved_model.pbtxt new file mode 100644 index 0000000000000..ec522a8b7c353 --- /dev/null +++ b/llvm/lib/Analysis/models/inliner/saved_model.pbtxt @@ -0,0 +1,32634 @@ +saved_model_schema_version: 1 +meta_graphs { + meta_info_def { + stripped_op_list { + op { + name: "Const" + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "value" + type: "tensor" + } + attr { + name: "dtype" + type: "type" + } + } + op { + name: "NoOp" + } + op { + name: "PartitionedCall" + input_arg { + name: "args" + type_list_attr: "Tin" + } + output_arg { + name: "output" + type_list_attr: "Tout" + } + attr { + name: "Tin" + type: "list(type)" + has_minimum: true + } + attr { + name: "Tout" + type: "list(type)" + has_minimum: true + } + attr { + name: "f" + type: "func" + } + attr { + name: "config" + type: "string" + default_value { + s: "" + } + } + attr { + name: "config_proto" + type: "string" + default_value { + s: "" + } + } + attr { + name: "executor_type" + type: "string" + default_value { + s: "" + } + } + } + op { + name: "Placeholder" + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + } + attr { + name: "shape" + type: "shape" + default_value { + shape { + unknown_rank: true + } + } + } + } + op { + name: "ReadVariableOp" + input_arg { + name: "resource" + type: DT_RESOURCE + } + output_arg { + name: "value" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + } + is_stateful: true + } + op { + name: "StatefulPartitionedCall" + input_arg { + name: "args" + type_list_attr: "Tin" + } + output_arg { + name: "output" + type_list_attr: "Tout" + } + attr { + name: "Tin" + type: "list(type)" + has_minimum: true + } + attr { + name: "Tout" + type: "list(type)" + has_minimum: true + } + attr { + name: "f" + type: "func" + } + attr { + name: "config" + type: "string" + default_value { + s: "" + } + } + attr { + name: "config_proto" + type: "string" + default_value { + s: "" + } + } + attr { + name: "executor_type" + type: "string" + default_value { + s: "" + } + } + is_stateful: true + } + op { + name: "VarHandleOp" + output_arg { + name: "resource" + type: DT_RESOURCE + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "dtype" + type: "type" + } + attr { + name: "shape" + type: "shape" + } + attr { + name: "allowed_devices" + type: "list(string)" + default_value { + list { + } + } + } + is_stateful: true + } + } + tags: "serve" + tensorflow_version: "1.15.0" + tensorflow_git_version: "unknown" + stripped_default_attrs: true + } + graph_def { + node { + name: "train_step" + op: "VarHandleOp" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + } + } + } + attr { + key: "shared_name" + value { + s: "train_step" + } + } + } + node { + name: "train_step/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "train_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + } + node { + name: "QNetwork/EncodingNetwork/dense/kernel" + op: "VarHandleOp" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 34 + } + dim { + size: 100 + } + } + } + } + attr { + key: "shared_name" + value { + s: "QNetwork/EncodingNetwork/dense/kernel" + } + } + } + node { + name: "QNetwork/EncodingNetwork/dense/kernel/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "QNetwork/EncodingNetwork/dense/kernel" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 34 + } + dim { + size: 100 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + } + node { + name: "QNetwork/EncodingNetwork/dense/bias" + op: "VarHandleOp" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 100 + } + } + } + } + attr { + key: "shared_name" + value { + s: "QNetwork/EncodingNetwork/dense/bias" + } + } + } + node { + name: "QNetwork/EncodingNetwork/dense/bias/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "QNetwork/EncodingNetwork/dense/bias" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 100 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + } + node { + name: "QNetwork/EncodingNetwork/dense_1/kernel" + op: "VarHandleOp" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 100 + } + dim { + size: 40 + } + } + } + } + attr { + key: "shared_name" + value { + s: "QNetwork/EncodingNetwork/dense_1/kernel" + } + } + } + node { + name: "QNetwork/EncodingNetwork/dense_1/kernel/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "QNetwork/EncodingNetwork/dense_1/kernel" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 100 + } + dim { + size: 40 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + } + node { + name: "QNetwork/EncodingNetwork/dense_1/bias" + op: "VarHandleOp" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 40 + } + } + } + } + attr { + key: "shared_name" + value { + s: "QNetwork/EncodingNetwork/dense_1/bias" + } + } + } + node { + name: "QNetwork/EncodingNetwork/dense_1/bias/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "QNetwork/EncodingNetwork/dense_1/bias" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 40 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + } + node { + name: "QNetwork/dense_2/kernel" + op: "VarHandleOp" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 40 + } + dim { + size: 2 + } + } + } + } + attr { + key: "shared_name" + value { + s: "QNetwork/dense_2/kernel" + } + } + } + node { + name: "QNetwork/dense_2/kernel/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "QNetwork/dense_2/kernel" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 40 + } + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + } + node { + name: "QNetwork/dense_2/bias" + op: "VarHandleOp" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + } + } + } + attr { + key: "shared_name" + value { + s: "QNetwork/dense_2/bias" + } + } + } + node { + name: "QNetwork/dense_2/bias/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "QNetwork/dense_2/bias" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + } + node { + name: "NoOp" + op: "NoOp" + } + node { + name: "Const" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "\nu\n\023\010\001\022\017_time_step_spec\n\024\010\002\022\020_trajectory_spec\n\023\010\003\022\017_wrapped_policy\n\016\010\004\022\ntrain_step\n\023\010\005\022\017model_variables\n\016\010\006\022\nsignatures\n\030\n\017\010\007\022\013observation\n\005\010\007\022\0013\n\030\n\017\010\007\022\013observation\n\005\010\007\022\0011\n;\n\016\010\010\022\n_q_network\n\023\010\001\022\017_time_step_spec\n\024\010\t\022\020_trajectory_spec\nE\022C\n\016VARIABLE_VALUE\022\ntrain_step\032%train_step/.ATTRIBUTES/VARIABLE_VALUE\n*\n\005\010\n\022\0010\n\005\010\013\022\0011\n\005\010\014\022\0012\n\005\010\r\022\0013\n\005\010\016\022\0014\n\005\010\017\022\0015\n\000\n\000\n\214\001\n\026\010\020\022\022_input_tensor_spec\n\014\010\021\022\010_encoder\n\022\010\022\022\016_q_value_layer\n\r\010\023\022\tvariables\n\031\010\024\022\025regularization_losses\n\027\010\025\022\023trainable_variables\n\r\010\026\022\tkeras_api\n\030\n\017\010\007\022\013observation\n\005\010\007\022\0011\ng\022e\n\016VARIABLE_VALUE\022%QNetwork/EncodingNetwork/dense/kernel\032,model_variables/0/.ATTRIBUTES/VARIABLE_VALUE\ne\022c\n\016VARIABLE_VALUE\022#QNetwork/EncodingNetwork/dense/bias\032,model_variables/1/.ATTRIBUTES/VARIABLE_VALUE\ni\022g\n\016VARIABLE_VALUE\022\'QNetwork/EncodingNetwork/dense_1/kernel\032,model_variables/2/.ATTRIBUTES/VARIABLE_VALUE\ng\022e\n\016VARIABLE_VALUE\022%QNetwork/EncodingNetwork/dense_1/bias\032,model_variables/3/.ATTRIBUTES/VARIABLE_VALUE\nY\022W\n\016VARIABLE_VALUE\022\027QNetwork/dense_2/kernel\032,model_variables/4/.ATTRIBUTES/VARIABLE_VALUE\nW\022U\n\016VARIABLE_VALUE\022\025QNetwork/dense_2/bias\032,model_variables/5/.ATTRIBUTES/VARIABLE_VALUE\n\000\n\334\001\n\026\010\027\022\022_input_tensor_spec\n\027\010\030\022\023_preprocessing_nest\n\036\010\031\022\032_flat_preprocessing_layers\n\033\010\032\022\027_preprocessing_combiner\n\032\010\033\022\026_postprocessing_layers\n\r\010\034\022\tvariables\n\031\010\035\022\025regularization_losses\n\027\010\036\022\023trainable_variables\n\r\010\037\022\tkeras_api\nh\n\n\010\016\022\006kernel\n\010\010\017\022\004bias\n\r\010 \022\tvariables\n\031\010!\022\025regularization_losses\n\027\010\"\022\023trainable_variables\n\r\010#\022\tkeras_api\n*\n\005\010\n\022\0010\n\005\010\013\022\0011\n\005\010\014\022\0012\n\005\010\r\022\0013\n\005\010\016\022\0014\n\005\010\017\022\0015\n\000\n*\n\005\010\n\022\0010\n\005\010\013\022\0011\n\005\010\014\022\0012\n\005\010\r\022\0013\n\005\010\016\022\0014\n\005\010\017\022\0015\n\255\001\n\021\010$\022\rlayer_metrics\n\r\010\023\022\tvariables\n\037\010%\022\033layer_regularization_losses\n\013\010&\022\007metrics\n\n\010\'\022\006layers\n\031\010\024\022\025regularization_losses\n\033\010(\022\027non_trainable_variables\n\027\010\025\022\023trainable_variables\n\000\n\000\nV\n\005\010)\022\0010\n\005\010*\022\0011\n\005\010+\022\0012\n\005\010,\022\0013\n\005\010-\022\0014\n\005\010.\022\0015\n\005\010/\022\0016\n\005\0100\022\0017\n\005\0101\022\0018\n\005\0102\022\0019\n\006\0103\022\00210\n\006\0104\022\00211\nR\n\r\0105\022\tvariables\n\031\0106\022\025regularization_losses\n\027\0107\022\023trainable_variables\n\r\0108\022\tkeras_api\n\025\n\005\0109\022\0010\n\005\010:\022\0011\n\005\010;\022\0012\n\034\n\005\010\n\022\0010\n\005\010\013\022\0011\n\005\010\014\022\0012\n\005\010\r\022\0013\n\000\n\034\n\005\010\n\022\0010\n\005\010\013\022\0011\n\005\010\014\022\0012\n\005\010\r\022\0013\n\255\001\n\021\010<\022\rlayer_metrics\n\r\010\034\022\tvariables\n\037\010=\022\033layer_regularization_losses\n\013\010>\022\007metrics\n\n\010?\022\006layers\n\031\010\035\022\025regularization_losses\n\033\010@\022\027non_trainable_variables\n\027\010\036\022\023trainable_variables\n\016\n\005\010\016\022\0010\n\005\010\017\022\0011\n\000\n\016\n\005\010\016\022\0010\n\005\010\017\022\0011\n\255\001\n\021\010A\022\rlayer_metrics\n\r\010 \022\tvariables\n\037\010B\022\033layer_regularization_losses\n\013\010C\022\007metrics\n\n\010D\022\006layers\n\031\010!\022\025regularization_losses\n\033\010E\022\027non_trainable_variables\n\027\010\"\022\023trainable_variables\n\000\n\000\n\000\n\016\n\005\010\021\022\0010\n\005\010\022\022\0011\n\000\nR\n\r\010F\022\tvariables\n\031\010G\022\025regularization_losses\n\027\010H\022\023trainable_variables\n\r\010I\022\tkeras_api\nR\n\r\010J\022\tvariables\n\031\010K\022\025regularization_losses\n\027\010L\022\023trainable_variables\n\r\010M\022\tkeras_api\nR\n\r\010N\022\tvariables\n\031\010O\022\025regularization_losses\n\027\010P\022\023trainable_variables\n\r\010Q\022\tkeras_api\nR\n\r\010R\022\tvariables\n\031\010S\022\025regularization_losses\n\027\010T\022\023trainable_variables\n\r\010U\022\tkeras_api\nR\n\r\010V\022\tvariables\n\031\010W\022\025regularization_losses\n\027\010X\022\023trainable_variables\n\r\010Y\022\tkeras_api\nR\n\r\010Z\022\tvariables\n\031\010[\022\025regularization_losses\n\027\010\\\022\023trainable_variables\n\r\010]\022\tkeras_api\nR\n\r\010^\022\tvariables\n\031\010_\022\025regularization_losses\n\027\010`\022\023trainable_variables\n\r\010a\022\tkeras_api\nR\n\r\010b\022\tvariables\n\031\010c\022\025regularization_losses\n\027\010d\022\023trainable_variables\n\r\010e\022\tkeras_api\nR\n\r\010f\022\tvariables\n\031\010g\022\025regularization_losses\n\027\010h\022\023trainable_variables\n\r\010i\022\tkeras_api\nR\n\r\010j\022\tvariables\n\031\010k\022\025regularization_losses\n\027\010l\022\023trainable_variables\n\r\010m\022\tkeras_api\nR\n\r\010n\022\tvariables\n\031\010o\022\025regularization_losses\n\027\010p\022\023trainable_variables\n\r\010q\022\tkeras_api\nR\n\r\010r\022\tvariables\n\031\010s\022\025regularization_losses\n\027\010t\022\023trainable_variables\n\r\010u\022\tkeras_api\n\000\n\000\n\000\n\255\001\n\021\010v\022\rlayer_metrics\n\r\0105\022\tvariables\n\037\010w\022\033layer_regularization_losses\n\013\010x\022\007metrics\n\n\010y\022\006layers\n\031\0106\022\025regularization_losses\n\033\010z\022\027non_trainable_variables\n\027\0107\022\023trainable_variables\nR\n\r\010{\022\tvariables\n\031\010|\022\025regularization_losses\n\027\010}\022\023trainable_variables\n\r\010~\022\tkeras_api\nk\n\n\010\n\022\006kernel\n\010\010\013\022\004bias\n\r\010\177\022\tvariables\n\032\010\200\001\022\025regularization_losses\n\030\010\201\001\022\023trainable_variables\n\016\010\202\001\022\tkeras_api\nl\n\n\010\014\022\006kernel\n\010\010\r\022\004bias\n\016\010\203\001\022\tvariables\n\032\010\204\001\022\025regularization_losses\n\030\010\205\001\022\023trainable_variables\n\016\010\206\001\022\tkeras_api\n\000\n\000\n\000\nv\n\005\010)\022\0010\n\005\010*\022\0011\n\005\010+\022\0012\n\005\010,\022\0013\n\005\010-\022\0014\n\005\010.\022\0015\n\005\010/\022\0016\n\005\0100\022\0017\n\005\0101\022\0018\n\005\0102\022\0019\n\006\0103\022\00210\n\006\0104\022\00211\n\006\010\032\022\00212\n\006\0109\022\00213\n\006\010:\022\00214\n\006\010;\022\00215\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\262\001\n\022\010\207\001\022\rlayer_metrics\n\r\010F\022\tvariables\n \010\210\001\022\033layer_regularization_losses\n\014\010\211\001\022\007metrics\n\013\010\212\001\022\006layers\n\031\010G\022\025regularization_losses\n\034\010\213\001\022\027non_trainable_variables\n\027\010H\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\214\001\022\rlayer_metrics\n\r\010J\022\tvariables\n \010\215\001\022\033layer_regularization_losses\n\014\010\216\001\022\007metrics\n\013\010\217\001\022\006layers\n\031\010K\022\025regularization_losses\n\034\010\220\001\022\027non_trainable_variables\n\027\010L\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\221\001\022\rlayer_metrics\n\r\010N\022\tvariables\n \010\222\001\022\033layer_regularization_losses\n\014\010\223\001\022\007metrics\n\013\010\224\001\022\006layers\n\031\010O\022\025regularization_losses\n\034\010\225\001\022\027non_trainable_variables\n\027\010P\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\226\001\022\rlayer_metrics\n\r\010R\022\tvariables\n \010\227\001\022\033layer_regularization_losses\n\014\010\230\001\022\007metrics\n\013\010\231\001\022\006layers\n\031\010S\022\025regularization_losses\n\034\010\232\001\022\027non_trainable_variables\n\027\010T\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\233\001\022\rlayer_metrics\n\r\010V\022\tvariables\n \010\234\001\022\033layer_regularization_losses\n\014\010\235\001\022\007metrics\n\013\010\236\001\022\006layers\n\031\010W\022\025regularization_losses\n\034\010\237\001\022\027non_trainable_variables\n\027\010X\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\240\001\022\rlayer_metrics\n\r\010Z\022\tvariables\n \010\241\001\022\033layer_regularization_losses\n\014\010\242\001\022\007metrics\n\013\010\243\001\022\006layers\n\031\010[\022\025regularization_losses\n\034\010\244\001\022\027non_trainable_variables\n\027\010\\\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\245\001\022\rlayer_metrics\n\r\010^\022\tvariables\n \010\246\001\022\033layer_regularization_losses\n\014\010\247\001\022\007metrics\n\013\010\250\001\022\006layers\n\031\010_\022\025regularization_losses\n\034\010\251\001\022\027non_trainable_variables\n\027\010`\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\252\001\022\rlayer_metrics\n\r\010b\022\tvariables\n \010\253\001\022\033layer_regularization_losses\n\014\010\254\001\022\007metrics\n\013\010\255\001\022\006layers\n\031\010c\022\025regularization_losses\n\034\010\256\001\022\027non_trainable_variables\n\027\010d\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\257\001\022\rlayer_metrics\n\r\010f\022\tvariables\n \010\260\001\022\033layer_regularization_losses\n\014\010\261\001\022\007metrics\n\013\010\262\001\022\006layers\n\031\010g\022\025regularization_losses\n\034\010\263\001\022\027non_trainable_variables\n\027\010h\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\264\001\022\rlayer_metrics\n\r\010j\022\tvariables\n \010\265\001\022\033layer_regularization_losses\n\014\010\266\001\022\007metrics\n\013\010\267\001\022\006layers\n\031\010k\022\025regularization_losses\n\034\010\270\001\022\027non_trainable_variables\n\027\010l\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\271\001\022\rlayer_metrics\n\r\010n\022\tvariables\n \010\272\001\022\033layer_regularization_losses\n\014\010\273\001\022\007metrics\n\013\010\274\001\022\006layers\n\031\010o\022\025regularization_losses\n\034\010\275\001\022\027non_trainable_variables\n\027\010p\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\276\001\022\rlayer_metrics\n\r\010r\022\tvariables\n \010\277\001\022\033layer_regularization_losses\n\014\010\300\001\022\007metrics\n\013\010\301\001\022\006layers\n\031\010s\022\025regularization_losses\n\034\010\302\001\022\027non_trainable_variables\n\027\010t\022\023trainable_variables\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\262\001\n\022\010\303\001\022\rlayer_metrics\n\r\010{\022\tvariables\n \010\304\001\022\033layer_regularization_losses\n\014\010\305\001\022\007metrics\n\013\010\306\001\022\006layers\n\031\010|\022\025regularization_losses\n\034\010\307\001\022\027non_trainable_variables\n\027\010}\022\023trainable_variables\n\016\n\005\010\n\022\0010\n\005\010\013\022\0011\n\000\n\016\n\005\010\n\022\0010\n\005\010\013\022\0011\n\264\001\n\022\010\310\001\022\rlayer_metrics\n\r\010\177\022\tvariables\n \010\311\001\022\033layer_regularization_losses\n\014\010\312\001\022\007metrics\n\013\010\313\001\022\006layers\n\032\010\200\001\022\025regularization_losses\n\034\010\314\001\022\027non_trainable_variables\n\030\010\201\001\022\023trainable_variables\n\016\n\005\010\014\022\0010\n\005\010\r\022\0011\n\000\n\016\n\005\010\014\022\0010\n\005\010\r\022\0011\n\265\001\n\022\010\315\001\022\rlayer_metrics\n\016\010\203\001\022\tvariables\n \010\316\001\022\033layer_regularization_losses\n\014\010\317\001\022\007metrics\n\013\010\320\001\022\006layers\n\032\010\204\001\022\025regularization_losses\n\034\010\321\001\022\027non_trainable_variables\n\030\010\205\001\022\023trainable_variables\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000" + } + } + } + } + node { + name: "action_callee_basic_block_count" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_callee_conditionally_executed_blocks" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_callee_users" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_caller_basic_block_count" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_caller_conditionally_executed_blocks" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_caller_users" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_callsite_height" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_cost_estimate" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_discount" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_edge_count" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_inlining_default" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_node_count" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_nr_ctant_params" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_reward" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_step_type" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "StatefulPartitionedCall" + op: "StatefulPartitionedCall" + input: "action_callee_basic_block_count" + input: "action_callee_conditionally_executed_blocks" + input: "action_callee_users" + input: "action_caller_basic_block_count" + input: "action_caller_conditionally_executed_blocks" + input: "action_caller_users" + input: "action_callsite_height" + input: "action_cost_estimate" + input: "action_discount" + input: "action_edge_count" + input: "action_inlining_default" + input: "action_node_count" + input: "action_nr_ctant_params" + input: "action_reward" + input: "action_step_type" + input: "QNetwork/EncodingNetwork/dense/kernel" + input: "QNetwork/EncodingNetwork/dense/bias" + input: "QNetwork/EncodingNetwork/dense_1/kernel" + input: "QNetwork/EncodingNetwork/dense_1/bias" + input: "QNetwork/dense_2/kernel" + input: "QNetwork/dense_2/bias" + attr { + key: "Tin" + value { + list { + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_FLOAT + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_FLOAT + type: DT_INT32 + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + i: 15 + i: 16 + i: 17 + i: 18 + i: 19 + i: 20 + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_signature_wrapper_4619026" + } + } + } + } + node { + name: "PartitionedCall" + op: "PartitionedCall" + attr { + key: "Tin" + value { + list { + } + } + } + attr { + key: "Tout" + value { + list { + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_signature_wrapper_4619033" + } + } + } + } + node { + name: "StatefulPartitionedCall_1" + op: "StatefulPartitionedCall" + input: "train_step" + attr { + key: "Tin" + value { + list { + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + i: 0 + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_signature_wrapper_4619048" + } + } + } + } + node { + name: "saver_filename" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "shape" + value { + shape { + } + } + } + } + node { + name: "StatefulPartitionedCall_2" + op: "StatefulPartitionedCall" + input: "saver_filename" + input: "train_step/Read/ReadVariableOp" + input: "QNetwork/EncodingNetwork/dense/kernel/Read/ReadVariableOp" + input: "QNetwork/EncodingNetwork/dense/bias/Read/ReadVariableOp" + input: "QNetwork/EncodingNetwork/dense_1/kernel/Read/ReadVariableOp" + input: "QNetwork/EncodingNetwork/dense_1/bias/Read/ReadVariableOp" + input: "QNetwork/dense_2/kernel/Read/ReadVariableOp" + input: "QNetwork/dense_2/bias/Read/ReadVariableOp" + input: "Const" + attr { + key: "Tin" + value { + list { + type: DT_STRING + type: DT_INT64 + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_STRING + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference__traced_save_4619143" + } + } + } + } + node { + name: "StatefulPartitionedCall_3" + op: "StatefulPartitionedCall" + input: "saver_filename" + input: "train_step" + input: "QNetwork/EncodingNetwork/dense/kernel" + input: "QNetwork/EncodingNetwork/dense/bias" + input: "QNetwork/EncodingNetwork/dense_1/kernel" + input: "QNetwork/EncodingNetwork/dense_1/bias" + input: "QNetwork/dense_2/kernel" + input: "QNetwork/dense_2/bias" + attr { + key: "Tin" + value { + list { + type: DT_STRING + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference__traced_restore_4619176" + } + } + } + } + library { + function { + signature { + name: "__inference_signature_wrapper_4619048" + input_arg { + name: "unknown" + type: DT_RESOURCE + } + output_arg { + name: "identity" + type: DT_INT64 + } + is_stateful: true + control_output: "StatefulPartitionedCall" + } + node_def { + name: "StatefulPartitionedCall" + op: "StatefulPartitionedCall" + input: "unknown" + attr { + key: "Tin" + value { + list { + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + i: 0 + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_function_with_signature_4619040" + } + } + } + experimental_debug_info { + original_node_names: "StatefulPartitionedCall" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "StatefulPartitionedCall:output:0" + input: "^StatefulPartitionedCall" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + control_ret { + key: "StatefulPartitionedCall" + value: "StatefulPartitionedCall" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference_function_with_signature_4619029" + } + node_def { + name: "PartitionedCall" + op: "PartitionedCall" + attr { + key: "Tin" + value { + list { + } + } + } + attr { + key: "Tout" + value { + list { + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_function_722" + } + } + } + experimental_debug_info { + original_node_names: "PartitionedCall" + } + } + attr { + key: "_input_shapes" + value { + } + } + } + function { + signature { + name: "__inference_action_931" + input_arg { + name: "time_step" + type: DT_INT32 + } + input_arg { + name: "time_step_1" + type: DT_FLOAT + } + input_arg { + name: "time_step_2" + type: DT_FLOAT + } + input_arg { + name: "time_step_3" + type: DT_INT64 + } + input_arg { + name: "time_step_4" + type: DT_INT64 + } + input_arg { + name: "time_step_5" + type: DT_INT64 + } + input_arg { + name: "time_step_6" + type: DT_INT64 + } + input_arg { + name: "time_step_7" + type: DT_INT64 + } + input_arg { + name: "time_step_8" + type: DT_INT64 + } + input_arg { + name: "time_step_9" + type: DT_INT64 + } + input_arg { + name: "time_step_10" + type: DT_INT64 + } + input_arg { + name: "time_step_11" + type: DT_INT64 + } + input_arg { + name: "time_step_12" + type: DT_INT64 + } + input_arg { + name: "time_step_13" + type: DT_INT64 + } + input_arg { + name: "time_step_14" + type: DT_INT64 + } + input_arg { + name: "qnetwork_encodingnetwork_dense_matmul_readvariableop_resource" + type: DT_RESOURCE + } + input_arg { + name: "qnetwork_encodingnetwork_dense_biasadd_readvariableop_resource" + type: DT_RESOURCE + } + input_arg { + name: "qnetwork_encodingnetwork_dense_1_matmul_readvariableop_resource" + type: DT_RESOURCE + } + input_arg { + name: "qnetwork_encodingnetwork_dense_1_biasadd_readvariableop_resource" + type: DT_RESOURCE + } + input_arg { + name: "qnetwork_dense_2_matmul_readvariableop_resource" + type: DT_RESOURCE + } + input_arg { + name: "qnetwork_dense_2_biasadd_readvariableop_resource" + type: DT_RESOURCE + } + output_arg { + name: "identity" + type: DT_INT64 + } + is_stateful: true + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_3" + input: "QNetwork/EncodingNetwork/lambda/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 9 + f: 9 + f: 9 + f: 9 + f: 10 + f: 10 + f: 11 + f: 12 + f: 13 + f: 14 + f: 14 + f: 14 + f: 16 + f: 17 + f: 19 + f: 23 + f: 27 + f: 39 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_4" + input: "QNetwork/EncodingNetwork/lambda_1/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_1/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 3 + f: 3 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 7 + f: 8 + f: 8 + f: 8 + f: 8 + f: 9 + f: 10 + f: 10 + f: 10 + f: 12 + f: 12 + f: 12 + f: 14 + f: 14 + f: 18 + f: 20 + f: 23 + f: 30 + f: 41 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_1/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_1/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_1/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_1/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_1/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_1/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_1/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_1/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_1/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_1/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_5" + input: "QNetwork/EncodingNetwork/lambda_2/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_2/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 21 + f: 21 + f: 21 + f: 21 + f: 21 + f: 21 + f: 21 + f: 22 + f: 22 + f: 22 + f: 22 + f: 23 + f: 23 + f: 23 + f: 24 + f: 24 + f: 24 + f: 25 + f: 25 + f: 25 + f: 25 + f: 25 + f: 25 + f: 26 + f: 26 + f: 26 + f: 27 + f: 27 + f: 27 + f: 27 + f: 28 + f: 28 + f: 29 + f: 29 + f: 29 + f: 29 + f: 30 + f: 30 + f: 31 + f: 31 + f: 31 + f: 31 + f: 32 + f: 32 + f: 33 + f: 33 + f: 33 + f: 34 + f: 34 + f: 34 + f: 34 + f: 35 + f: 35 + f: 36 + f: 36 + f: 37 + f: 37 + f: 37 + f: 38 + f: 38 + f: 39 + f: 39 + f: 40 + f: 40 + f: 41 + f: 41 + f: 41 + f: 42 + f: 43 + f: 43 + f: 44 + f: 44 + f: 45 + f: 45 + f: 46 + f: 46 + f: 46 + f: 47 + f: 47 + f: 48 + f: 49 + f: 49 + f: 50 + f: 50 + f: 51 + f: 52 + f: 53 + f: 53 + f: 54 + f: 55 + f: 56 + f: 57 + f: 57 + f: 58 + f: 59 + f: 60 + f: 61 + f: 61 + f: 63 + f: 63 + f: 64 + f: 65 + f: 66 + f: 67 + f: 67 + f: 69 + f: 70 + f: 71 + f: 72 + f: 73 + f: 74 + f: 75 + f: 77 + f: 78 + f: 79 + f: 80 + f: 81 + f: 82 + f: 83 + f: 85 + f: 86 + f: 88 + f: 89 + f: 91 + f: 92 + f: 94 + f: 96 + f: 97 + f: 99 + f: 100 + f: 101 + f: 103 + f: 105 + f: 107 + f: 109 + f: 111 + f: 113 + f: 115 + f: 118 + f: 121 + f: 123 + f: 126 + f: 128 + f: 130 + f: 133 + f: 135 + f: 137 + f: 140 + f: 143 + f: 146 + f: 148 + f: 151 + f: 154 + f: 157 + f: 161 + f: 163 + f: 166 + f: 169 + f: 173 + f: 178 + f: 183 + f: 189 + f: 193 + f: 197 + f: 202 + f: 208 + f: 213 + f: 218 + f: 223 + f: 228 + f: 233 + f: 239 + f: 245 + f: 250 + f: 257 + f: 262 + f: 269 + f: 277 + f: 284 + f: 292 + f: 300 + f: 308 + f: 319 + f: 329 + f: 340 + f: 349 + f: 359 + f: 371 + f: 382 + f: 394 + f: 410 + f: 423 + f: 435 + f: 445 + f: 462 + f: 480 + f: 492 + f: 506 + f: 519 + f: 536 + f: 557 + f: 577 + f: 598 + f: 622 + f: 655 + f: 679 + f: 707 + f: 733 + f: 751 + f: 787 + f: 814 + f: 847 + f: 897 + f: 934 + f: 997 + f: 1062 + f: 1111 + f: 1181 + f: 1275 + f: 1385 + f: 1465 + f: 1603 + f: 1769 + f: 2057 + f: 2257 + f: 2803 + f: 3468 + f: 4417 + f: 6538 + f: 16126 + f: 23446 + f: 33536 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_2/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_2/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_2/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_2/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_2/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_2/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_2/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_2/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_2/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_2/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_6" + input: "QNetwork/EncodingNetwork/lambda_3/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_3/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 21 + f: 21 + f: 21 + f: 21 + f: 21 + f: 21 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 23 + f: 23 + f: 23 + f: 24 + f: 24 + f: 24 + f: 24 + f: 24 + f: 24 + f: 25 + f: 25 + f: 25 + f: 25 + f: 25 + f: 26 + f: 26 + f: 26 + f: 26 + f: 27 + f: 27 + f: 27 + f: 27 + f: 27 + f: 28 + f: 28 + f: 28 + f: 29 + f: 29 + f: 29 + f: 29 + f: 30 + f: 30 + f: 30 + f: 31 + f: 31 + f: 31 + f: 32 + f: 32 + f: 32 + f: 33 + f: 33 + f: 33 + f: 34 + f: 34 + f: 34 + f: 34 + f: 35 + f: 35 + f: 35 + f: 36 + f: 36 + f: 36 + f: 37 + f: 37 + f: 37 + f: 38 + f: 38 + f: 38 + f: 38 + f: 39 + f: 39 + f: 40 + f: 40 + f: 41 + f: 41 + f: 42 + f: 43 + f: 43 + f: 44 + f: 45 + f: 45 + f: 46 + f: 47 + f: 47 + f: 48 + f: 49 + f: 49 + f: 50 + f: 50 + f: 52 + f: 52 + f: 53 + f: 54 + f: 55 + f: 55 + f: 57 + f: 58 + f: 59 + f: 60 + f: 62 + f: 64 + f: 65 + f: 66 + f: 68 + f: 70 + f: 70 + f: 70 + f: 70 + f: 70 + f: 71 + f: 73 + f: 75 + f: 76 + f: 78 + f: 81 + f: 84 + f: 86 + f: 90 + f: 94 + f: 98 + f: 101 + f: 106 + f: 111 + f: 117 + f: 123 + f: 130 + f: 138 + f: 146 + f: 157 + f: 163 + f: 176 + f: 187 + f: 198 + f: 214 + f: 227 + f: 252 + f: 280 + f: 327 + f: 395 + f: 506 + f: 671 + f: 1025 + f: 1971 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_3/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_3/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_3/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_3/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_3/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_3/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_3/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_3/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_3/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_3/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_7" + input: "QNetwork/EncodingNetwork/lambda_4/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_4/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 5 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 7 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 11 + f: 11 + f: 11 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 13 + f: 13 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 19 + f: 19 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 21 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 24 + f: 24 + f: 24 + f: 24 + f: 24 + f: 24 + f: 24 + f: 24 + f: 25 + f: 26 + f: 26 + f: 26 + f: 26 + f: 26 + f: 26 + f: 26 + f: 26 + f: 26 + f: 26 + f: 27 + f: 28 + f: 28 + f: 28 + f: 28 + f: 28 + f: 29 + f: 30 + f: 30 + f: 30 + f: 30 + f: 30 + f: 30 + f: 31 + f: 32 + f: 32 + f: 32 + f: 32 + f: 32 + f: 34 + f: 34 + f: 34 + f: 34 + f: 34 + f: 34 + f: 35 + f: 36 + f: 36 + f: 36 + f: 37 + f: 38 + f: 38 + f: 38 + f: 39 + f: 40 + f: 40 + f: 41 + f: 42 + f: 42 + f: 43 + f: 44 + f: 44 + f: 46 + f: 46 + f: 47 + f: 48 + f: 48 + f: 50 + f: 50 + f: 52 + f: 52 + f: 54 + f: 55 + f: 55 + f: 56 + f: 57 + f: 58 + f: 60 + f: 60 + f: 60 + f: 60 + f: 60 + f: 60 + f: 62 + f: 62 + f: 64 + f: 65 + f: 66 + f: 68 + f: 70 + f: 72 + f: 74 + f: 77 + f: 80 + f: 82 + f: 86 + f: 89 + f: 92 + f: 96 + f: 99 + f: 104 + f: 108 + f: 114 + f: 119 + f: 125 + f: 131 + f: 139 + f: 146 + f: 157 + f: 167 + f: 176 + f: 188 + f: 198 + f: 215 + f: 236 + f: 262 + f: 306 + f: 376 + f: 462 + f: 596 + f: 942 + f: 1428 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_4/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_4/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_4/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_4/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_4/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_4/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_4/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_4/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_4/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_4/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_8" + input: "QNetwork/EncodingNetwork/lambda_5/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_5/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 10 + f: 10 + f: 11 + f: 11 + f: 12 + f: 13 + f: 14 + f: 15 + f: 16 + f: 18 + f: 20 + f: 23 + f: 29 + f: 38 + f: 60 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_5/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_5/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_5/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_5/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_5/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_5/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_5/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_5/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_5/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_5/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_9" + input: "QNetwork/EncodingNetwork/lambda_6/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_6/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 21 + f: 21 + f: 21 + f: 21 + f: 21 + f: 21 + f: 21 + f: 21 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 23 + f: 23 + f: 23 + f: 23 + f: 23 + f: 23 + f: 23 + f: 24 + f: 24 + f: 24 + f: 24 + f: 24 + f: 25 + f: 25 + f: 25 + f: 25 + f: 25 + f: 26 + f: 26 + f: 26 + f: 26 + f: 27 + f: 27 + f: 27 + f: 28 + f: 28 + f: 28 + f: 29 + f: 29 + f: 30 + f: 30 + f: 30 + f: 31 + f: 31 + f: 32 + f: 32 + f: 33 + f: 33 + f: 34 + f: 35 + f: 37 + f: 38 + f: 40 + f: 46 + f: 51 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_6/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_6/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_6/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_6/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_6/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_6/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_6/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_6/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_6/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_6/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_10" + input: "QNetwork/EncodingNetwork/lambda_7/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_7/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: -15035 + f: -15030 + f: -15025 + f: -15000 + f: -14985 + f: -14945 + f: -14745 + f: -70 + f: -55 + f: -55 + f: -50 + f: -50 + f: -50 + f: -45 + f: -45 + f: -45 + f: -45 + f: -45 + f: -45 + f: -45 + f: -45 + f: -45 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -10 + f: -10 + f: -10 + f: -10 + f: -10 + f: -10 + f: -10 + f: -10 + f: -10 + f: -10 + f: -10 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 25 + f: 25 + f: 25 + f: 25 + f: 25 + f: 25 + f: 25 + f: 30 + f: 30 + f: 30 + f: 30 + f: 30 + f: 30 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 40 + f: 40 + f: 40 + f: 40 + f: 40 + f: 40 + f: 40 + f: 40 + f: 40 + f: 40 + f: 40 + f: 40 + f: 45 + f: 45 + f: 45 + f: 45 + f: 45 + f: 45 + f: 45 + f: 45 + f: 45 + f: 45 + f: 50 + f: 50 + f: 50 + f: 50 + f: 50 + f: 50 + f: 50 + f: 50 + f: 50 + f: 55 + f: 55 + f: 60 + f: 60 + f: 60 + f: 60 + f: 60 + f: 60 + f: 60 + f: 60 + f: 60 + f: 60 + f: 65 + f: 65 + f: 65 + f: 65 + f: 65 + f: 65 + f: 65 + f: 65 + f: 65 + f: 65 + f: 65 + f: 65 + f: 70 + f: 70 + f: 70 + f: 70 + f: 70 + f: 70 + f: 70 + f: 75 + f: 75 + f: 80 + f: 80 + f: 80 + f: 85 + f: 85 + f: 85 + f: 90 + f: 90 + f: 90 + f: 90 + f: 95 + f: 95 + f: 100 + f: 100 + f: 105 + f: 110 + f: 115 + f: 120 + f: 125 + f: 125 + f: 130 + f: 140 + f: 140 + f: 145 + f: 150 + f: 155 + f: 160 + f: 160 + f: 165 + f: 170 + f: 175 + f: 180 + f: 190 + f: 200 + f: 210 + f: 215 + f: 220 + f: 220 + f: 230 + f: 235 + f: 245 + f: 250 + f: 260 + f: 275 + f: 290 + f: 305 + f: 325 + f: 350 + f: 370 + f: 390 + f: 425 + f: 460 + f: 500 + f: 560 + f: 650 + f: 790 + f: 1025 + f: 1600 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_7/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_7/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_7/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_7/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_7/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_7/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_7/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_7/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_7/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_7/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_11" + input: "QNetwork/EncodingNetwork/lambda_8/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_8/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 18 + f: 29 + f: 39 + f: 48 + f: 57 + f: 64 + f: 70 + f: 76 + f: 82 + f: 87 + f: 92 + f: 97 + f: 101 + f: 105 + f: 109 + f: 113 + f: 116 + f: 120 + f: 123 + f: 127 + f: 130 + f: 134 + f: 137 + f: 140 + f: 143 + f: 146 + f: 149 + f: 152 + f: 156 + f: 159 + f: 162 + f: 165 + f: 168 + f: 171 + f: 174 + f: 177 + f: 180 + f: 183 + f: 186 + f: 188 + f: 191 + f: 194 + f: 197 + f: 200 + f: 203 + f: 205 + f: 208 + f: 211 + f: 214 + f: 217 + f: 219 + f: 222 + f: 225 + f: 228 + f: 231 + f: 233 + f: 236 + f: 239 + f: 242 + f: 244 + f: 247 + f: 250 + f: 253 + f: 255 + f: 258 + f: 261 + f: 264 + f: 266 + f: 269 + f: 272 + f: 275 + f: 278 + f: 280 + f: 283 + f: 286 + f: 289 + f: 292 + f: 294 + f: 297 + f: 300 + f: 303 + f: 305 + f: 308 + f: 311 + f: 314 + f: 317 + f: 319 + f: 322 + f: 325 + f: 327 + f: 330 + f: 333 + f: 336 + f: 339 + f: 341 + f: 344 + f: 347 + f: 350 + f: 353 + f: 355 + f: 358 + f: 361 + f: 364 + f: 367 + f: 370 + f: 373 + f: 375 + f: 378 + f: 381 + f: 384 + f: 387 + f: 390 + f: 393 + f: 396 + f: 399 + f: 401 + f: 404 + f: 407 + f: 410 + f: 413 + f: 416 + f: 419 + f: 422 + f: 425 + f: 428 + f: 431 + f: 434 + f: 437 + f: 440 + f: 443 + f: 446 + f: 449 + f: 452 + f: 455 + f: 458 + f: 461 + f: 464 + f: 467 + f: 470 + f: 473 + f: 476 + f: 479 + f: 483 + f: 486 + f: 489 + f: 492 + f: 495 + f: 498 + f: 501 + f: 504 + f: 507 + f: 511 + f: 514 + f: 517 + f: 520 + f: 523 + f: 526 + f: 530 + f: 533 + f: 536 + f: 539 + f: 542 + f: 545 + f: 549 + f: 552 + f: 555 + f: 558 + f: 562 + f: 565 + f: 569 + f: 572 + f: 575 + f: 579 + f: 582 + f: 585 + f: 589 + f: 592 + f: 595 + f: 599 + f: 602 + f: 605 + f: 609 + f: 612 + f: 616 + f: 620 + f: 623 + f: 626 + f: 630 + f: 634 + f: 637 + f: 641 + f: 644 + f: 648 + f: 651 + f: 655 + f: 658 + f: 662 + f: 665 + f: 669 + f: 672 + f: 676 + f: 680 + f: 683 + f: 687 + f: 691 + f: 694 + f: 698 + f: 702 + f: 705 + f: 709 + f: 712 + f: 716 + f: 720 + f: 724 + f: 727 + f: 731 + f: 735 + f: 739 + f: 742 + f: 746 + f: 750 + f: 754 + f: 758 + f: 761 + f: 765 + f: 769 + f: 773 + f: 777 + f: 780 + f: 784 + f: 788 + f: 792 + f: 796 + f: 800 + f: 804 + f: 808 + f: 812 + f: 816 + f: 820 + f: 823 + f: 828 + f: 832 + f: 836 + f: 840 + f: 844 + f: 848 + f: 852 + f: 856 + f: 860 + f: 864 + f: 868 + f: 873 + f: 877 + f: 881 + f: 885 + f: 889 + f: 893 + f: 897 + f: 902 + f: 906 + f: 910 + f: 914 + f: 919 + f: 923 + f: 927 + f: 931 + f: 935 + f: 940 + f: 944 + f: 948 + f: 953 + f: 957 + f: 962 + f: 966 + f: 970 + f: 975 + f: 979 + f: 984 + f: 988 + f: 993 + f: 997 + f: 1002 + f: 1006 + f: 1011 + f: 1015 + f: 1020 + f: 1024 + f: 1029 + f: 1034 + f: 1038 + f: 1043 + f: 1047 + f: 1052 + f: 1057 + f: 1062 + f: 1066 + f: 1071 + f: 1076 + f: 1081 + f: 1086 + f: 1090 + f: 1095 + f: 1100 + f: 1105 + f: 1110 + f: 1114 + f: 1119 + f: 1124 + f: 1129 + f: 1134 + f: 1139 + f: 1144 + f: 1149 + f: 1154 + f: 1159 + f: 1164 + f: 1169 + f: 1174 + f: 1179 + f: 1184 + f: 1189 + f: 1194 + f: 1199 + f: 1204 + f: 1209 + f: 1215 + f: 1220 + f: 1225 + f: 1230 + f: 1235 + f: 1241 + f: 1246 + f: 1251 + f: 1257 + f: 1262 + f: 1267 + f: 1273 + f: 1278 + f: 1284 + f: 1289 + f: 1294 + f: 1300 + f: 1305 + f: 1311 + f: 1316 + f: 1322 + f: 1327 + f: 1333 + f: 1338 + f: 1344 + f: 1350 + f: 1355 + f: 1361 + f: 1367 + f: 1372 + f: 1378 + f: 1383 + f: 1389 + f: 1395 + f: 1401 + f: 1407 + f: 1413 + f: 1418 + f: 1424 + f: 1430 + f: 1436 + f: 1442 + f: 1448 + f: 1454 + f: 1459 + f: 1465 + f: 1472 + f: 1477 + f: 1483 + f: 1489 + f: 1495 + f: 1501 + f: 1507 + f: 1514 + f: 1520 + f: 1526 + f: 1532 + f: 1538 + f: 1545 + f: 1551 + f: 1557 + f: 1564 + f: 1570 + f: 1576 + f: 1583 + f: 1589 + f: 1596 + f: 1602 + f: 1608 + f: 1615 + f: 1621 + f: 1628 + f: 1634 + f: 1641 + f: 1647 + f: 1654 + f: 1661 + f: 1667 + f: 1674 + f: 1681 + f: 1687 + f: 1694 + f: 1701 + f: 1708 + f: 1715 + f: 1722 + f: 1729 + f: 1735 + f: 1742 + f: 1749 + f: 1756 + f: 1763 + f: 1770 + f: 1777 + f: 1784 + f: 1791 + f: 1798 + f: 1806 + f: 1812 + f: 1820 + f: 1827 + f: 1835 + f: 1841 + f: 1849 + f: 1856 + f: 1863 + f: 1871 + f: 1878 + f: 1885 + f: 1893 + f: 1901 + f: 1908 + f: 1915 + f: 1923 + f: 1930 + f: 1938 + f: 1946 + f: 1953 + f: 1961 + f: 1969 + f: 1976 + f: 1984 + f: 1992 + f: 2000 + f: 2007 + f: 2015 + f: 2023 + f: 2031 + f: 2039 + f: 2047 + f: 2055 + f: 2063 + f: 2071 + f: 2079 + f: 2087 + f: 2095 + f: 2104 + f: 2112 + f: 2120 + f: 2128 + f: 2137 + f: 2146 + f: 2154 + f: 2162 + f: 2171 + f: 2179 + f: 2188 + f: 2197 + f: 2205 + f: 2214 + f: 2223 + f: 2232 + f: 2241 + f: 2250 + f: 2258 + f: 2268 + f: 2277 + f: 2285 + f: 2294 + f: 2304 + f: 2313 + f: 2322 + f: 2331 + f: 2340 + f: 2350 + f: 2359 + f: 2368 + f: 2378 + f: 2388 + f: 2397 + f: 2407 + f: 2416 + f: 2426 + f: 2436 + f: 2446 + f: 2455 + f: 2465 + f: 2475 + f: 2485 + f: 2495 + f: 2505 + f: 2515 + f: 2525 + f: 2535 + f: 2545 + f: 2556 + f: 2566 + f: 2577 + f: 2587 + f: 2598 + f: 2609 + f: 2620 + f: 2631 + f: 2641 + f: 2652 + f: 2663 + f: 2674 + f: 2685 + f: 2696 + f: 2708 + f: 2719 + f: 2730 + f: 2742 + f: 2753 + f: 2764 + f: 2776 + f: 2788 + f: 2799 + f: 2811 + f: 2823 + f: 2835 + f: 2847 + f: 2858 + f: 2870 + f: 2882 + f: 2894 + f: 2906 + f: 2919 + f: 2931 + f: 2943 + f: 2956 + f: 2968 + f: 2981 + f: 2994 + f: 3006 + f: 3019 + f: 3032 + f: 3045 + f: 3058 + f: 3070 + f: 3083 + f: 3096 + f: 3109 + f: 3121 + f: 3134 + f: 3148 + f: 3161 + f: 3174 + f: 3187 + f: 3200 + f: 3214 + f: 3228 + f: 3242 + f: 3255 + f: 3268 + f: 3283 + f: 3297 + f: 3310 + f: 3325 + f: 3340 + f: 3353 + f: 3368 + f: 3383 + f: 3398 + f: 3412 + f: 3427 + f: 3442 + f: 3457 + f: 3471 + f: 3487 + f: 3502 + f: 3516 + f: 3531 + f: 3546 + f: 3561 + f: 3577 + f: 3593 + f: 3608 + f: 3625 + f: 3641 + f: 3657 + f: 3673 + f: 3690 + f: 3706 + f: 3722 + f: 3738 + f: 3755 + f: 3772 + f: 3789 + f: 3805 + f: 3823 + f: 3839 + f: 3856 + f: 3873 + f: 3891 + f: 3908 + f: 3926 + f: 3944 + f: 3960 + f: 3977 + f: 3995 + f: 4013 + f: 4031 + f: 4048 + f: 4067 + f: 4085 + f: 4104 + f: 4122 + f: 4140 + f: 4159 + f: 4177 + f: 4196 + f: 4215 + f: 4234 + f: 4253 + f: 4272 + f: 4291 + f: 4311 + f: 4332 + f: 4351 + f: 4371 + f: 4391 + f: 4412 + f: 4433 + f: 4454 + f: 4474 + f: 4496 + f: 4518 + f: 4538 + f: 4558 + f: 4579 + f: 4601 + f: 4619 + f: 4640 + f: 4662 + f: 4684 + f: 4706 + f: 4728 + f: 4751 + f: 4771 + f: 4794 + f: 4818 + f: 4840 + f: 4863 + f: 4887 + f: 4910 + f: 4933 + f: 4956 + f: 4980 + f: 5004 + f: 5028 + f: 5052 + f: 5076 + f: 5100 + f: 5125 + f: 5152 + f: 5175 + f: 5200 + f: 5226 + f: 5251 + f: 5278 + f: 5304 + f: 5329 + f: 5354 + f: 5381 + f: 5407 + f: 5433 + f: 5460 + f: 5488 + f: 5516 + f: 5544 + f: 5573 + f: 5600 + f: 5628 + f: 5656 + f: 5684 + f: 5713 + f: 5741 + f: 5771 + f: 5799 + f: 5830 + f: 5860 + f: 5891 + f: 5921 + f: 5951 + f: 5980 + f: 6010 + f: 6041 + f: 6073 + f: 6105 + f: 6133 + f: 6163 + f: 6195 + f: 6227 + f: 6258 + f: 6291 + f: 6322 + f: 6356 + f: 6390 + f: 6424 + f: 6457 + f: 6491 + f: 6527 + f: 6561 + f: 6596 + f: 6631 + f: 6665 + f: 6701 + f: 6736 + f: 6771 + f: 6805 + f: 6840 + f: 6877 + f: 6911 + f: 6947 + f: 6985 + f: 7022 + f: 7059 + f: 7097 + f: 7135 + f: 7174 + f: 7212 + f: 7251 + f: 7289 + f: 7327 + f: 7366 + f: 7406 + f: 7447 + f: 7486 + f: 7525 + f: 7566 + f: 7606 + f: 7646 + f: 7688 + f: 7728 + f: 7771 + f: 7814 + f: 7859 + f: 7901 + f: 7949 + f: 7992 + f: 8036 + f: 8082 + f: 8127 + f: 8173 + f: 8218 + f: 8262 + f: 8309 + f: 8353 + f: 8397 + f: 8444 + f: 8489 + f: 8539 + f: 8585 + f: 8632 + f: 8682 + f: 8727 + f: 8777 + f: 8828 + f: 8879 + f: 8929 + f: 8982 + f: 9037 + f: 9087 + f: 9140 + f: 9193 + f: 9250 + f: 9305 + f: 9361 + f: 9418 + f: 9475 + f: 9532 + f: 9589 + f: 9644 + f: 9699 + f: 9758 + f: 9818 + f: 9875 + f: 9935 + f: 9997 + f: 10057 + f: 10117 + f: 10174 + f: 10232 + f: 10296 + f: 10356 + f: 10419 + f: 10482 + f: 10546 + f: 10608 + f: 10670 + f: 10729 + f: 10790 + f: 10855 + f: 10920 + f: 10990 + f: 11054 + f: 11118 + f: 11181 + f: 11248 + f: 11316 + f: 11385 + f: 11454 + f: 11526 + f: 11597 + f: 11667 + f: 11740 + f: 11820 + f: 11897 + f: 11973 + f: 12046 + f: 12126 + f: 12204 + f: 12287 + f: 12370 + f: 12456 + f: 12538 + f: 12627 + f: 12714 + f: 12799 + f: 12883 + f: 12971 + f: 13062 + f: 13154 + f: 13233 + f: 13328 + f: 13418 + f: 13511 + f: 13607 + f: 13709 + f: 13806 + f: 13903 + f: 14002 + f: 14104 + f: 14200 + f: 14288 + f: 14391 + f: 14488 + f: 14590 + f: 14698 + f: 14808 + f: 14910 + f: 15020 + f: 15126 + f: 15238 + f: 15347 + f: 15456 + f: 15574 + f: 15692 + f: 15786 + f: 15896 + f: 16016 + f: 16136 + f: 16250 + f: 16352 + f: 16474 + f: 16575 + f: 16702 + f: 16835 + f: 16965 + f: 17096 + f: 17232 + f: 17370 + f: 17443 + f: 17581 + f: 17719 + f: 17864 + f: 17976 + f: 18116 + f: 18250 + f: 18396 + f: 18540 + f: 18690 + f: 18840 + f: 18989 + f: 19136 + f: 19294 + f: 19445 + f: 19589 + f: 19750 + f: 19905 + f: 20064 + f: 20191 + f: 20325 + f: 20497 + f: 20662 + f: 20833 + f: 20981 + f: 21152 + f: 21334 + f: 21510 + f: 21642 + f: 21821 + f: 22001 + f: 22186 + f: 22379 + f: 22568 + f: 22770 + f: 22958 + f: 23162 + f: 23360 + f: 23524 + f: 23737 + f: 23960 + f: 24175 + f: 24395 + f: 24631 + f: 24865 + f: 25091 + f: 25327 + f: 25580 + f: 25833 + f: 26089 + f: 26361 + f: 26636 + f: 26889 + f: 27155 + f: 27436 + f: 27715 + f: 28003 + f: 28303 + f: 28600 + f: 28916 + f: 29223 + f: 29553 + f: 29884 + f: 30200 + f: 30538 + f: 30868 + f: 31211 + f: 31548 + f: 31881 + f: 32253 + f: 32605 + f: 32980 + f: 33385 + f: 33805 + f: 34254 + f: 34723 + f: 35167 + f: 35666 + f: 36125 + f: 36652 + f: 37177 + f: 37739 + f: 38321 + f: 38932 + f: 39640 + f: 40337 + f: 41000 + f: 41626 + f: 42385 + f: 43122 + f: 43890 + f: 44687 + f: 45609 + f: 46520 + f: 47489 + f: 48432 + f: 49458 + f: 50511 + f: 51561 + f: 52568 + f: 53676 + f: 54936 + f: 56071 + f: 57302 + f: 58513 + f: 59800 + f: 61192 + f: 62702 + f: 64205 + f: 65868 + f: 67780 + f: 69960 + f: 72330 + f: 74918 + f: 77540 + f: 80344 + f: 83727 + f: 87662 + f: 93589 + f: 101441 + f: 110544 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_8/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_8/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_8/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_8/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_8/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_8/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_8/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_8/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_8/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_8/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_9/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_9/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_9/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_12" + input: "QNetwork/EncodingNetwork/lambda_9/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_9/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_9/zeros_like" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + float_val: 0 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_9/zeros_like" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_13" + input: "QNetwork/EncodingNetwork/lambda_10/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_10/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 13 + f: 38 + f: 56 + f: 70 + f: 82 + f: 94 + f: 104 + f: 114 + f: 123 + f: 131 + f: 139 + f: 148 + f: 152 + f: 153 + f: 158 + f: 163 + f: 170 + f: 174 + f: 178 + f: 180 + f: 183 + f: 186 + f: 188 + f: 190 + f: 192 + f: 196 + f: 198 + f: 201 + f: 205 + f: 208 + f: 212 + f: 215 + f: 219 + f: 221 + f: 225 + f: 227 + f: 229 + f: 232 + f: 233 + f: 236 + f: 239 + f: 242 + f: 245 + f: 248 + f: 250 + f: 252 + f: 254 + f: 256 + f: 259 + f: 261 + f: 264 + f: 267 + f: 270 + f: 272 + f: 275 + f: 278 + f: 280 + f: 283 + f: 285 + f: 287 + f: 290 + f: 293 + f: 295 + f: 297 + f: 300 + f: 303 + f: 305 + f: 308 + f: 311 + f: 313 + f: 316 + f: 319 + f: 322 + f: 325 + f: 329 + f: 331 + f: 333 + f: 336 + f: 338 + f: 340 + f: 343 + f: 345 + f: 347 + f: 347 + f: 349 + f: 351 + f: 353 + f: 355 + f: 357 + f: 359 + f: 361 + f: 363 + f: 365 + f: 368 + f: 369 + f: 371 + f: 373 + f: 375 + f: 377 + f: 380 + f: 382 + f: 385 + f: 387 + f: 389 + f: 391 + f: 394 + f: 396 + f: 398 + f: 400 + f: 403 + f: 405 + f: 408 + f: 410 + f: 412 + f: 415 + f: 417 + f: 420 + f: 422 + f: 425 + f: 427 + f: 429 + f: 432 + f: 434 + f: 437 + f: 439 + f: 442 + f: 444 + f: 446 + f: 449 + f: 451 + f: 454 + f: 456 + f: 458 + f: 461 + f: 463 + f: 466 + f: 469 + f: 472 + f: 474 + f: 476 + f: 479 + f: 482 + f: 483 + f: 486 + f: 489 + f: 492 + f: 495 + f: 498 + f: 500 + f: 503 + f: 505 + f: 508 + f: 510 + f: 513 + f: 516 + f: 519 + f: 522 + f: 524 + f: 528 + f: 530 + f: 533 + f: 536 + f: 539 + f: 541 + f: 544 + f: 547 + f: 550 + f: 553 + f: 556 + f: 559 + f: 561 + f: 563 + f: 567 + f: 570 + f: 572 + f: 575 + f: 577 + f: 580 + f: 584 + f: 586 + f: 589 + f: 592 + f: 595 + f: 598 + f: 601 + f: 605 + f: 607 + f: 611 + f: 613 + f: 617 + f: 620 + f: 623 + f: 626 + f: 629 + f: 632 + f: 635 + f: 639 + f: 642 + f: 645 + f: 648 + f: 651 + f: 654 + f: 657 + f: 660 + f: 662 + f: 666 + f: 669 + f: 672 + f: 676 + f: 679 + f: 682 + f: 685 + f: 688 + f: 690 + f: 693 + f: 696 + f: 699 + f: 702 + f: 705 + f: 709 + f: 712 + f: 714 + f: 718 + f: 721 + f: 724 + f: 726 + f: 728 + f: 729 + f: 731 + f: 734 + f: 737 + f: 741 + f: 745 + f: 748 + f: 750 + f: 753 + f: 756 + f: 760 + f: 763 + f: 766 + f: 770 + f: 773 + f: 776 + f: 779 + f: 782 + f: 786 + f: 788 + f: 793 + f: 796 + f: 798 + f: 802 + f: 805 + f: 808 + f: 811 + f: 815 + f: 818 + f: 820 + f: 824 + f: 827 + f: 829 + f: 832 + f: 835 + f: 838 + f: 842 + f: 846 + f: 849 + f: 854 + f: 857 + f: 860 + f: 864 + f: 867 + f: 871 + f: 875 + f: 879 + f: 882 + f: 887 + f: 890 + f: 893 + f: 897 + f: 901 + f: 905 + f: 908 + f: 911 + f: 915 + f: 918 + f: 921 + f: 925 + f: 929 + f: 932 + f: 934 + f: 937 + f: 940 + f: 943 + f: 946 + f: 950 + f: 953 + f: 956 + f: 961 + f: 965 + f: 969 + f: 973 + f: 976 + f: 980 + f: 982 + f: 985 + f: 990 + f: 994 + f: 997 + f: 1001 + f: 1005 + f: 1007 + f: 1010 + f: 1014 + f: 1018 + f: 1022 + f: 1025 + f: 1028 + f: 1033 + f: 1035 + f: 1038 + f: 1042 + f: 1047 + f: 1052 + f: 1056 + f: 1060 + f: 1063 + f: 1067 + f: 1071 + f: 1075 + f: 1079 + f: 1083 + f: 1086 + f: 1088 + f: 1092 + f: 1097 + f: 1102 + f: 1106 + f: 1109 + f: 1113 + f: 1117 + f: 1120 + f: 1125 + f: 1129 + f: 1134 + f: 1137 + f: 1142 + f: 1146 + f: 1150 + f: 1151 + f: 1155 + f: 1159 + f: 1162 + f: 1166 + f: 1170 + f: 1174 + f: 1177 + f: 1181 + f: 1185 + f: 1188 + f: 1193 + f: 1196 + f: 1203 + f: 1207 + f: 1212 + f: 1214 + f: 1217 + f: 1220 + f: 1222 + f: 1222 + f: 1226 + f: 1229 + f: 1233 + f: 1237 + f: 1241 + f: 1246 + f: 1250 + f: 1253 + f: 1257 + f: 1262 + f: 1267 + f: 1272 + f: 1278 + f: 1283 + f: 1287 + f: 1293 + f: 1297 + f: 1301 + f: 1304 + f: 1309 + f: 1315 + f: 1320 + f: 1325 + f: 1329 + f: 1333 + f: 1336 + f: 1341 + f: 1344 + f: 1348 + f: 1351 + f: 1357 + f: 1363 + f: 1368 + f: 1374 + f: 1379 + f: 1383 + f: 1386 + f: 1391 + f: 1395 + f: 1399 + f: 1403 + f: 1407 + f: 1410 + f: 1415 + f: 1418 + f: 1423 + f: 1428 + f: 1432 + f: 1436 + f: 1438 + f: 1442 + f: 1446 + f: 1450 + f: 1454 + f: 1462 + f: 1467 + f: 1472 + f: 1477 + f: 1483 + f: 1488 + f: 1492 + f: 1496 + f: 1503 + f: 1508 + f: 1513 + f: 1518 + f: 1520 + f: 1526 + f: 1531 + f: 1534 + f: 1538 + f: 1542 + f: 1546 + f: 1552 + f: 1558 + f: 1564 + f: 1568 + f: 1573 + f: 1578 + f: 1581 + f: 1590 + f: 1596 + f: 1601 + f: 1606 + f: 1611 + f: 1616 + f: 1622 + f: 1629 + f: 1634 + f: 1640 + f: 1647 + f: 1651 + f: 1657 + f: 1660 + f: 1665 + f: 1672 + f: 1678 + f: 1686 + f: 1692 + f: 1698 + f: 1704 + f: 1709 + f: 1714 + f: 1719 + f: 1724 + f: 1730 + f: 1737 + f: 1744 + f: 1751 + f: 1755 + f: 1761 + f: 1764 + f: 1772 + f: 1778 + f: 1784 + f: 1789 + f: 1799 + f: 1804 + f: 1811 + f: 1819 + f: 1825 + f: 1830 + f: 1838 + f: 1849 + f: 1858 + f: 1862 + f: 1868 + f: 1872 + f: 1878 + f: 1885 + f: 1888 + f: 1892 + f: 1897 + f: 1902 + f: 1907 + f: 1919 + f: 1926 + f: 1932 + f: 1936 + f: 1941 + f: 1946 + f: 1952 + f: 1960 + f: 1968 + f: 1977 + f: 1985 + f: 1992 + f: 1997 + f: 2006 + f: 2012 + f: 2018 + f: 2026 + f: 2034 + f: 2044 + f: 2050 + f: 2057 + f: 2064 + f: 2069 + f: 2075 + f: 2082 + f: 2091 + f: 2098 + f: 2107 + f: 2122 + f: 2126 + f: 2135 + f: 2146 + f: 2149 + f: 2157 + f: 2163 + f: 2172 + f: 2178 + f: 2184 + f: 2191 + f: 2198 + f: 2208 + f: 2216 + f: 2223 + f: 2235 + f: 2242 + f: 2252 + f: 2263 + f: 2272 + f: 2277 + f: 2288 + f: 2296 + f: 2306 + f: 2311 + f: 2318 + f: 2323 + f: 2334 + f: 2341 + f: 2356 + f: 2366 + f: 2373 + f: 2379 + f: 2386 + f: 2407 + f: 2416 + f: 2423 + f: 2432 + f: 2438 + f: 2448 + f: 2453 + f: 2464 + f: 2473 + f: 2473 + f: 2481 + f: 2492 + f: 2504 + f: 2511 + f: 2523 + f: 2529 + f: 2537 + f: 2545 + f: 2556 + f: 2566 + f: 2575 + f: 2584 + f: 2592 + f: 2602 + f: 2613 + f: 2624 + f: 2636 + f: 2643 + f: 2647 + f: 2652 + f: 2664 + f: 2675 + f: 2688 + f: 2693 + f: 2702 + f: 2709 + f: 2722 + f: 2739 + f: 2754 + f: 2766 + f: 2776 + f: 2786 + f: 2799 + f: 2810 + f: 2832 + f: 2840 + f: 2849 + f: 2860 + f: 2873 + f: 2889 + f: 2908 + f: 2914 + f: 2926 + f: 2939 + f: 2950 + f: 2961 + f: 2969 + f: 2978 + f: 2990 + f: 2999 + f: 3023 + f: 3032 + f: 3049 + f: 3066 + f: 3085 + f: 3101 + f: 3107 + f: 3117 + f: 3129 + f: 3144 + f: 3167 + f: 3190 + f: 3212 + f: 3229 + f: 3238 + f: 3264 + f: 3293 + f: 3302 + f: 3309 + f: 3314 + f: 3323 + f: 3344 + f: 3352 + f: 3362 + f: 3390 + f: 3400 + f: 3411 + f: 3435 + f: 3456 + f: 3470 + f: 3485 + f: 3498 + f: 3505 + f: 3519 + f: 3539 + f: 3545 + f: 3545 + f: 3560 + f: 3576 + f: 3597 + f: 3607 + f: 3621 + f: 3641 + f: 3665 + f: 3679 + f: 3701 + f: 3714 + f: 3733 + f: 3741 + f: 3745 + f: 3757 + f: 3773 + f: 3787 + f: 3795 + f: 3805 + f: 3822 + f: 3835 + f: 3844 + f: 3861 + f: 3872 + f: 3878 + f: 3897 + f: 3919 + f: 3941 + f: 3971 + f: 4004 + f: 4014 + f: 4019 + f: 4061 + f: 4068 + f: 4089 + f: 4108 + f: 4117 + f: 4125 + f: 4146 + f: 4165 + f: 4194 + f: 4204 + f: 4224 + f: 4236 + f: 4263 + f: 4290 + f: 4301 + f: 4319 + f: 4326 + f: 4347 + f: 4369 + f: 4386 + f: 4413 + f: 4435 + f: 4451 + f: 4451 + f: 4451 + f: 4476 + f: 4500 + f: 4539 + f: 4579 + f: 4592 + f: 4600 + f: 4622 + f: 4650 + f: 4683 + f: 4714 + f: 4742 + f: 4755 + f: 4771 + f: 4788 + f: 4816 + f: 4828 + f: 4831 + f: 4831 + f: 4831 + f: 4843 + f: 4852 + f: 4865 + f: 4896 + f: 4915 + f: 4931 + f: 4952 + f: 4965 + f: 4983 + f: 5007 + f: 5043 + f: 5061 + f: 5081 + f: 5095 + f: 5122 + f: 5143 + f: 5171 + f: 5204 + f: 5226 + f: 5233 + f: 5250 + f: 5281 + f: 5320 + f: 5323 + f: 5328 + f: 5345 + f: 5374 + f: 5413 + f: 5466 + f: 5492 + f: 5524 + f: 5555 + f: 5567 + f: 5610 + f: 5676 + f: 5701 + f: 5716 + f: 5744 + f: 5768 + f: 5795 + f: 5818 + f: 5854 + f: 5906 + f: 5934 + f: 5960 + f: 5975 + f: 5993 + f: 6025 + f: 6034 + f: 6051 + f: 6082 + f: 6106 + f: 6125 + f: 6159 + f: 6187 + f: 6242 + f: 6287 + f: 6311 + f: 6332 + f: 6348 + f: 6358 + f: 6368 + f: 6377 + f: 6402 + f: 6407 + f: 6428 + f: 6450 + f: 6475 + f: 6498 + f: 6505 + f: 6533 + f: 6565 + f: 6580 + f: 6595 + f: 6611 + f: 6654 + f: 6658 + f: 6705 + f: 6751 + f: 6786 + f: 6828 + f: 6876 + f: 6896 + f: 6948 + f: 6964 + f: 7065 + f: 7082 + f: 7118 + f: 7184 + f: 7214 + f: 7271 + f: 7310 + f: 7357 + f: 7405 + f: 7506 + f: 7613 + f: 7641 + f: 7675 + f: 7720 + f: 7781 + f: 7833 + f: 7860 + f: 7898 + f: 7929 + f: 8044 + f: 8104 + f: 8148 + f: 8236 + f: 8273 + f: 8313 + f: 8349 + f: 8381 + f: 8409 + f: 8498 + f: 8507 + f: 8524 + f: 8570 + f: 8607 + f: 8630 + f: 8637 + f: 8675 + f: 8700 + f: 8714 + f: 8734 + f: 8776 + f: 8836 + f: 8854 + f: 8867 + f: 8868 + f: 9065 + f: 9113 + f: 9121 + f: 9241 + f: 9357 + f: 9360 + f: 9585 + f: 9613 + f: 9684 + f: 9727 + f: 9751 + f: 9777 + f: 9802 + f: 9889 + f: 9903 + f: 9914 + f: 9978 + f: 10061 + f: 10192 + f: 10213 + f: 10345 + f: 10369 + f: 10404 + f: 10430 + f: 10471 + f: 10481 + f: 10489 + f: 10492 + f: 10494 + f: 10524 + f: 10554 + f: 10557 + f: 10560 + f: 10562 + f: 10641 + f: 10716 + f: 10842 + f: 10897 + f: 10967 + f: 11053 + f: 11128 + f: 11137 + f: 11328 + f: 11336 + f: 11401 + f: 11532 + f: 11573 + f: 11860 + f: 11880 + f: 12013 + f: 12305 + f: 12358 + f: 12386 + f: 12404 + f: 12456 + f: 12456 + f: 12476 + f: 12615 + f: 12677 + f: 12981 + f: 13094 + f: 13197 + f: 13708 + f: 13717 + f: 13788 + f: 14049 + f: 14112 + f: 14224 + f: 14257 + f: 14681 + f: 14901 + f: 15006 + f: 15071 + f: 15100 + f: 15248 + f: 15669 + f: 15877 + f: 15953 + f: 15953 + f: 16066 + f: 16072 + f: 16271 + f: 16292 + f: 16386 + f: 16490 + f: 16633 + f: 16670 + f: 16834 + f: 16896 + f: 17543 + f: 17693 + f: 17800 + f: 17859 + f: 18397 + f: 18811 + f: 18826 + f: 18971 + f: 19304 + f: 19319 + f: 19695 + f: 20378 + f: 20865 + f: 21313 + f: 21330 + f: 22321 + f: 22760 + f: 22770 + f: 23783 + f: 23785 + f: 24525 + f: 24844 + f: 24848 + f: 24964 + f: 24966 + f: 27468 + f: 27478 + f: 27555 + f: 27555 + f: 28215 + f: 28219 + f: 28336 + f: 28490 + f: 30213 + f: 30228 + f: 30242 + f: 34116 + f: 43518 + f: 43518 + f: 43518 + f: 43852 + f: 43852 + f: 43852 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_10/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_10/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_10/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_10/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_10/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_10/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_10/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_10/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_10/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_10/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_14" + input: "QNetwork/EncodingNetwork/lambda_11/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_11/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 3 + f: 4 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_11/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_11/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_11/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_11/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_11/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_11/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_11/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_11/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_11/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_11/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/concatenate/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/concatenate/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/concatenate/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_1/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_2/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_3/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_4/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_5/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_6/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_7/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_8/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_9/zeros_like:output:0" + input: "QNetwork/EncodingNetwork/lambda_10/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_11/concat:output:0" + input: "QNetwork/EncodingNetwork/concatenate/concat/axis:output:0" + attr { + key: "N" + value { + i: 12 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 34 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/concatenate/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/flatten/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\377\377\377\377\"\000\000\000" + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/flatten/Const" + } + } + node_def { + name: "QNetwork/EncodingNetwork/flatten/Reshape" + op: "Reshape" + input: "QNetwork/EncodingNetwork/concatenate/concat:output:0" + input: "QNetwork/EncodingNetwork/flatten/Const:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 34 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/flatten/Reshape" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense/MatMul/ReadVariableOp" + op: "ReadVariableOp" + input: "qnetwork_encodingnetwork_dense_matmul_readvariableop_resource" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 34 + } + dim { + size: 100 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense/MatMul/ReadVariableOp" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense/MatMul" + op: "MatMul" + input: "QNetwork/EncodingNetwork/flatten/Reshape:output:0" + input: "QNetwork/EncodingNetwork/dense/MatMul/ReadVariableOp:value:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 100 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense/MatMul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense/BiasAdd/ReadVariableOp" + op: "ReadVariableOp" + input: "qnetwork_encodingnetwork_dense_biasadd_readvariableop_resource" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 100 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense/BiasAdd/ReadVariableOp" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense/BiasAdd" + op: "BiasAdd" + input: "QNetwork/EncodingNetwork/dense/MatMul:product:0" + input: "QNetwork/EncodingNetwork/dense/BiasAdd/ReadVariableOp:value:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 100 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense/BiasAdd" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense/Relu" + op: "Relu" + input: "QNetwork/EncodingNetwork/dense/BiasAdd:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 100 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense/Relu" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense_1/MatMul/ReadVariableOp" + op: "ReadVariableOp" + input: "qnetwork_encodingnetwork_dense_1_matmul_readvariableop_resource" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 100 + } + dim { + size: 40 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense_1/MatMul/ReadVariableOp" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense_1/MatMul" + op: "MatMul" + input: "QNetwork/EncodingNetwork/dense/Relu:activations:0" + input: "QNetwork/EncodingNetwork/dense_1/MatMul/ReadVariableOp:value:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 40 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense_1/MatMul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense_1/BiasAdd/ReadVariableOp" + op: "ReadVariableOp" + input: "qnetwork_encodingnetwork_dense_1_biasadd_readvariableop_resource" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 40 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense_1/BiasAdd/ReadVariableOp" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense_1/BiasAdd" + op: "BiasAdd" + input: "QNetwork/EncodingNetwork/dense_1/MatMul:product:0" + input: "QNetwork/EncodingNetwork/dense_1/BiasAdd/ReadVariableOp:value:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 40 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense_1/BiasAdd" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense_1/Relu" + op: "Relu" + input: "QNetwork/EncodingNetwork/dense_1/BiasAdd:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 40 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense_1/Relu" + } + } + node_def { + name: "QNetwork/dense_2/MatMul/ReadVariableOp" + op: "ReadVariableOp" + input: "qnetwork_dense_2_matmul_readvariableop_resource" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 40 + } + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "QNetwork/dense_2/MatMul/ReadVariableOp" + } + } + node_def { + name: "QNetwork/dense_2/MatMul" + op: "MatMul" + input: "QNetwork/EncodingNetwork/dense_1/Relu:activations:0" + input: "QNetwork/dense_2/MatMul/ReadVariableOp:value:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 2 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/dense_2/MatMul" + } + } + node_def { + name: "QNetwork/dense_2/BiasAdd/ReadVariableOp" + op: "ReadVariableOp" + input: "qnetwork_dense_2_biasadd_readvariableop_resource" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "QNetwork/dense_2/BiasAdd/ReadVariableOp" + } + } + node_def { + name: "QNetwork/dense_2/BiasAdd" + op: "BiasAdd" + input: "QNetwork/dense_2/MatMul:product:0" + input: "QNetwork/dense_2/BiasAdd/ReadVariableOp:value:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 2 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/dense_2/BiasAdd" + } + } + node_def { + name: "ShiftedCategorical_1/mode/ArgMax/dimension" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "ShiftedCategorical_1/mode/ArgMax/dimension" + } + } + node_def { + name: "ShiftedCategorical_1/mode/ArgMax" + op: "ArgMax" + input: "QNetwork/dense_2/BiasAdd:output:0" + input: "ShiftedCategorical_1/mode/ArgMax/dimension:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "ShiftedCategorical_1/mode/ArgMax" + } + } + node_def { + name: "add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } + experimental_debug_info { + original_node_names: "add/y" + } + } + node_def { + name: "add" + op: "AddV2" + input: "ShiftedCategorical_1/mode/ArgMax:output:0" + input: "add/y:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "add" + } + } + node_def { + name: "Deterministic/atol" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } + experimental_debug_info { + original_node_names: "Deterministic/atol" + } + } + node_def { + name: "Deterministic/rtol" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } + experimental_debug_info { + original_node_names: "Deterministic/rtol" + } + } + node_def { + name: "Deterministic_1/sample/sample_shape/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/sample_shape/x" + } + } + node_def { + name: "Deterministic_1/sample/sample_shape" + op: "Cast" + input: "Deterministic_1/sample/sample_shape/x:output:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/sample_shape" + } + } + node_def { + name: "Deterministic_1/sample/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/Shape" + } + } + node_def { + name: "Deterministic_1/sample/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/Shape_1" + } + } + node_def { + name: "Deterministic_1/sample/Shape_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/Shape_2" + } + } + node_def { + name: "Deterministic_1/sample/BroadcastArgs" + op: "BroadcastArgs" + input: "Deterministic_1/sample/Shape_1:output:0" + input: "Deterministic_1/sample/Shape_2:output:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/BroadcastArgs" + } + } + node_def { + name: "Deterministic_1/sample/BroadcastArgs_1" + op: "BroadcastArgs" + input: "Deterministic_1/sample/Shape:output:0" + input: "Deterministic_1/sample/BroadcastArgs:r0:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/BroadcastArgs_1" + } + } + node_def { + name: "Deterministic_1/sample/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/Const" + } + } + node_def { + name: "Deterministic_1/sample/concat/values_0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/concat/values_0" + } + } + node_def { + name: "Deterministic_1/sample/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/concat/axis" + } + } + node_def { + name: "Deterministic_1/sample/concat" + op: "ConcatV2" + input: "Deterministic_1/sample/concat/values_0:output:0" + input: "Deterministic_1/sample/BroadcastArgs_1:r0:0" + input: "Deterministic_1/sample/Const:output:0" + input: "Deterministic_1/sample/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/concat" + } + } + node_def { + name: "Deterministic_1/sample/BroadcastTo" + op: "BroadcastTo" + input: "add:z:0" + input: "Deterministic_1/sample/concat:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/BroadcastTo" + } + } + node_def { + name: "Deterministic_1/sample/Shape_3" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\001\000\000\000\001\000\000\000" + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/Shape_3" + } + } + node_def { + name: "Deterministic_1/sample/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/strided_slice/stack" + } + } + node_def { + name: "Deterministic_1/sample/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/strided_slice/stack_1" + } + } + node_def { + name: "Deterministic_1/sample/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/strided_slice/stack_2" + } + } + node_def { + name: "Deterministic_1/sample/strided_slice" + op: "StridedSlice" + input: "Deterministic_1/sample/Shape_3:output:0" + input: "Deterministic_1/sample/strided_slice/stack:output:0" + input: "Deterministic_1/sample/strided_slice/stack_1:output:0" + input: "Deterministic_1/sample/strided_slice/stack_2:output:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "end_mask" + value { + i: 1 + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/strided_slice" + } + } + node_def { + name: "Deterministic_1/sample/concat_1/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/concat_1/axis" + } + } + node_def { + name: "Deterministic_1/sample/concat_1" + op: "ConcatV2" + input: "Deterministic_1/sample/sample_shape:y:0" + input: "Deterministic_1/sample/strided_slice:output:0" + input: "Deterministic_1/sample/concat_1/axis:output:0" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/concat_1" + } + } + node_def { + name: "Deterministic_1/sample/Reshape" + op: "Reshape" + input: "Deterministic_1/sample/BroadcastTo:output:0" + input: "Deterministic_1/sample/concat_1:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/Reshape" + } + } + node_def { + name: "clip_by_value/Minimum/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 1 + } + } + } + experimental_debug_info { + original_node_names: "clip_by_value/Minimum/y" + } + } + node_def { + name: "clip_by_value/Minimum" + op: "Minimum" + input: "Deterministic_1/sample/Reshape:output:0" + input: "clip_by_value/Minimum/y:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "clip_by_value/Minimum" + } + } + node_def { + name: "clip_by_value/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } + experimental_debug_info { + original_node_names: "clip_by_value/y" + } + } + node_def { + name: "clip_by_value" + op: "Maximum" + input: "clip_by_value/Minimum:z:0" + input: "clip_by_value/y:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "clip_by_value" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "clip_by_value:z:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 1 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 2 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 3 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 4 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 5 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 6 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 7 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 8 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 9 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 10 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 11 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 12 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 13 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 14 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 15 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 16 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 17 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 18 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 19 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 20 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference_signature_wrapper_4619033" + } + node_def { + name: "PartitionedCall" + op: "PartitionedCall" + attr { + key: "Tin" + value { + list { + } + } + } + attr { + key: "Tout" + value { + list { + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_function_with_signature_4619029" + } + } + } + experimental_debug_info { + original_node_names: "PartitionedCall" + } + } + attr { + key: "_input_shapes" + value { + } + } + } + function { + signature { + name: "__inference__traced_save_4619143" + input_arg { + name: "file_prefix" + type: DT_STRING + } + input_arg { + name: "savev2_train_step_read_readvariableop" + type: DT_INT64 + } + input_arg { + name: "savev2_qnetwork_encodingnetwork_dense_kernel_read_readvariableop" + type: DT_FLOAT + } + input_arg { + name: "savev2_qnetwork_encodingnetwork_dense_bias_read_readvariableop" + type: DT_FLOAT + } + input_arg { + name: "savev2_qnetwork_encodingnetwork_dense_1_kernel_read_readvariableop" + type: DT_FLOAT + } + input_arg { + name: "savev2_qnetwork_encodingnetwork_dense_1_bias_read_readvariableop" + type: DT_FLOAT + } + input_arg { + name: "savev2_qnetwork_dense_2_kernel_read_readvariableop" + type: DT_FLOAT + } + input_arg { + name: "savev2_qnetwork_dense_2_bias_read_readvariableop" + type: DT_FLOAT + } + input_arg { + name: "savev2_1_const" + type: DT_STRING + } + output_arg { + name: "identity_1" + type: DT_STRING + } + is_stateful: true + control_output: "MergeV2Checkpoints" + control_output: "SaveV2" + control_output: "SaveV2_1" + } + node_def { + name: "StaticRegexFullMatch" + op: "StaticRegexFullMatch" + input: "file_prefix" + device: "/device:CPU:*" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "pattern" + value { + s: "^s3://.*" + } + } + experimental_debug_info { + original_node_names: "StaticRegexFullMatch" + } + } + node_def { + name: "Const" + op: "Const" + device: "/device:CPU:*" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: ".part" + } + } + } + experimental_debug_info { + original_node_names: "Const" + } + } + node_def { + name: "Const_1" + op: "Const" + device: "/device:CPU:*" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "_temp_f4c8d2e64931472295be68a11e57e937/part" + } + } + } + experimental_debug_info { + original_node_names: "Const_1" + } + } + node_def { + name: "Select" + op: "Select" + input: "StaticRegexFullMatch:output:0" + input: "Const:output:0" + input: "Const_1:output:0" + device: "/device:CPU:*" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "Select" + } + } + node_def { + name: "StringJoin" + op: "StringJoin" + input: "file_prefix" + input: "Select:output:0" + device: "/device:CPU:*" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "StringJoin" + } + } + node_def { + name: "num_shards" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } + experimental_debug_info { + original_node_names: "num_shards" + } + } + node_def { + name: "ShardedFilename/shard" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } + experimental_debug_info { + original_node_names: "ShardedFilename/shard" + } + } + node_def { + name: "ShardedFilename" + op: "ShardedFilename" + input: "StringJoin:output:0" + input: "ShardedFilename/shard:output:0" + input: "num_shards:output:0" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "ShardedFilename" + } + } + node_def { + name: "SaveV2/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 7 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 7 + } + } + string_val: "train_step/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/0/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/1/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/2/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/3/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/4/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/5/.ATTRIBUTES/VARIABLE_VALUE" + } + } + } + experimental_debug_info { + original_node_names: "SaveV2/tensor_names" + } + } + node_def { + name: "SaveV2/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 7 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 7 + } + } + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + } + } + } + experimental_debug_info { + original_node_names: "SaveV2/shape_and_slices" + } + } + node_def { + name: "SaveV2" + op: "SaveV2" + input: "ShardedFilename:filename:0" + input: "SaveV2/tensor_names:output:0" + input: "SaveV2/shape_and_slices:output:0" + input: "savev2_train_step_read_readvariableop" + input: "savev2_qnetwork_encodingnetwork_dense_kernel_read_readvariableop" + input: "savev2_qnetwork_encodingnetwork_dense_bias_read_readvariableop" + input: "savev2_qnetwork_encodingnetwork_dense_1_kernel_read_readvariableop" + input: "savev2_qnetwork_encodingnetwork_dense_1_bias_read_readvariableop" + input: "savev2_qnetwork_dense_2_kernel_read_readvariableop" + input: "savev2_qnetwork_dense_2_bias_read_readvariableop" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_INT64 + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + } + } + } + experimental_debug_info { + original_node_names: "SaveV2" + } + } + node_def { + name: "ShardedFilename_1/shard" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } + experimental_debug_info { + original_node_names: "ShardedFilename_1/shard" + } + } + node_def { + name: "ShardedFilename_1" + op: "ShardedFilename" + input: "StringJoin:output:0" + input: "ShardedFilename_1/shard:output:0" + input: "num_shards:output:0" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "ShardedFilename_1" + } + } + node_def { + name: "SaveV2_1/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "_CHECKPOINTABLE_OBJECT_GRAPH" + } + } + } + experimental_debug_info { + original_node_names: "SaveV2_1/tensor_names" + } + } + node_def { + name: "SaveV2_1/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } + experimental_debug_info { + original_node_names: "SaveV2_1/shape_and_slices" + } + } + node_def { + name: "SaveV2_1" + op: "SaveV2" + input: "ShardedFilename_1:filename:0" + input: "SaveV2_1/tensor_names:output:0" + input: "SaveV2_1/shape_and_slices:output:0" + input: "savev2_1_const" + input: "^SaveV2" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_STRING + } + } + } + experimental_debug_info { + original_node_names: "SaveV2_1" + } + } + node_def { + name: "MergeV2Checkpoints/checkpoint_prefixes" + op: "Pack" + input: "ShardedFilename:filename:0" + input: "ShardedFilename_1:filename:0" + input: "^SaveV2" + input: "^SaveV2_1" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + experimental_debug_info { + original_node_names: "MergeV2Checkpoints/checkpoint_prefixes" + } + } + node_def { + name: "MergeV2Checkpoints" + op: "MergeV2Checkpoints" + input: "MergeV2Checkpoints/checkpoint_prefixes:output:0" + input: "file_prefix" + input: "^SaveV2_1" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + experimental_debug_info { + original_node_names: "MergeV2Checkpoints" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "file_prefix" + input: "^MergeV2Checkpoints" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + node_def { + name: "Identity_1" + op: "Identity" + input: "Identity:output:0" + input: "^MergeV2Checkpoints" + input: "^SaveV2" + input: "^SaveV2_1" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "Identity_1" + } + } + ret { + key: "identity_1" + value: "Identity_1:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + } + shape { + } + shape { + dim { + size: 34 + } + dim { + size: 100 + } + } + shape { + dim { + size: 100 + } + } + shape { + dim { + size: 100 + } + dim { + size: 40 + } + } + shape { + dim { + size: 40 + } + } + shape { + dim { + size: 40 + } + dim { + size: 2 + } + } + shape { + dim { + size: 2 + } + } + shape { + } + } + } + } + control_ret { + key: "MergeV2Checkpoints" + value: "MergeV2Checkpoints" + } + control_ret { + key: "SaveV2" + value: "SaveV2" + } + control_ret { + key: "SaveV2_1" + value: "SaveV2_1" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "file_prefix" + } + } + } + } + arg_attr { + key: 1 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 2 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 34 + } + dim { + size: 100 + } + } + } + } + } + } + } + arg_attr { + key: 3 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 100 + } + } + } + } + } + } + } + arg_attr { + key: 4 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 100 + } + dim { + size: 40 + } + } + } + } + } + } + } + arg_attr { + key: 5 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 40 + } + } + } + } + } + } + } + arg_attr { + key: 6 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 40 + } + dim { + size: 2 + } + } + } + } + } + } + } + arg_attr { + key: 7 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + } + } + arg_attr { + key: 8 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference_function_722" + } + attr { + key: "_input_shapes" + value { + } + } + } + function { + signature { + name: "__inference_signature_wrapper_4619026" + input_arg { + name: "callee_basic_block_count" + type: DT_INT64 + } + input_arg { + name: "callee_conditionally_executed_blocks" + type: DT_INT64 + } + input_arg { + name: "callee_users" + type: DT_INT64 + } + input_arg { + name: "caller_basic_block_count" + type: DT_INT64 + } + input_arg { + name: "caller_conditionally_executed_blocks" + type: DT_INT64 + } + input_arg { + name: "caller_users" + type: DT_INT64 + } + input_arg { + name: "callsite_height" + type: DT_INT64 + } + input_arg { + name: "cost_estimate" + type: DT_INT64 + } + input_arg { + name: "discount" + type: DT_FLOAT + } + input_arg { + name: "edge_count" + type: DT_INT64 + } + input_arg { + name: "inlining_default" + type: DT_INT64 + } + input_arg { + name: "node_count" + type: DT_INT64 + } + input_arg { + name: "nr_ctant_params" + type: DT_INT64 + } + input_arg { + name: "reward" + type: DT_FLOAT + } + input_arg { + name: "step_type" + type: DT_INT32 + } + input_arg { + name: "unknown" + type: DT_RESOURCE + } + input_arg { + name: "unknown_0" + type: DT_RESOURCE + } + input_arg { + name: "unknown_1" + type: DT_RESOURCE + } + input_arg { + name: "unknown_2" + type: DT_RESOURCE + } + input_arg { + name: "unknown_3" + type: DT_RESOURCE + } + input_arg { + name: "unknown_4" + type: DT_RESOURCE + } + output_arg { + name: "identity" + type: DT_INT64 + } + is_stateful: true + control_output: "StatefulPartitionedCall" + } + node_def { + name: "StatefulPartitionedCall" + op: "StatefulPartitionedCall" + input: "step_type" + input: "reward" + input: "discount" + input: "callee_basic_block_count" + input: "callee_conditionally_executed_blocks" + input: "callee_users" + input: "caller_basic_block_count" + input: "caller_conditionally_executed_blocks" + input: "caller_users" + input: "callsite_height" + input: "cost_estimate" + input: "edge_count" + input: "inlining_default" + input: "node_count" + input: "nr_ctant_params" + input: "unknown" + input: "unknown_0" + input: "unknown_1" + input: "unknown_2" + input: "unknown_3" + input: "unknown_4" + attr { + key: "Tin" + value { + list { + type: DT_INT32 + type: DT_FLOAT + type: DT_FLOAT + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + i: 15 + i: 16 + i: 17 + i: 18 + i: 19 + i: 20 + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_function_with_signature_4618993" + } + } + } + experimental_debug_info { + original_node_names: "StatefulPartitionedCall" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "StatefulPartitionedCall:output:0" + input: "^StatefulPartitionedCall" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + control_ret { + key: "StatefulPartitionedCall" + value: "StatefulPartitionedCall" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callee_basic_block_count" + } + } + } + } + arg_attr { + key: 1 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callee_conditionally_executed_blocks" + } + } + } + } + arg_attr { + key: 2 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callee_users" + } + } + } + } + arg_attr { + key: 3 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "caller_basic_block_count" + } + } + } + } + arg_attr { + key: 4 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "caller_conditionally_executed_blocks" + } + } + } + } + arg_attr { + key: 5 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "caller_users" + } + } + } + } + arg_attr { + key: 6 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callsite_height" + } + } + } + } + arg_attr { + key: 7 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "cost_estimate" + } + } + } + } + arg_attr { + key: 8 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "discount" + } + } + } + } + arg_attr { + key: 9 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "edge_count" + } + } + } + } + arg_attr { + key: 10 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "inlining_default" + } + } + } + } + arg_attr { + key: 11 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "node_count" + } + } + } + } + arg_attr { + key: 12 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "nr_ctant_params" + } + } + } + } + arg_attr { + key: 13 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "reward" + } + } + } + } + arg_attr { + key: 14 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "step_type" + } + } + } + } + arg_attr { + key: 15 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 16 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 17 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 18 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 19 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 20 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference_function_with_signature_4618993" + input_arg { + name: "step_type" + type: DT_INT32 + } + input_arg { + name: "reward" + type: DT_FLOAT + } + input_arg { + name: "discount" + type: DT_FLOAT + } + input_arg { + name: "callee_basic_block_count" + type: DT_INT64 + } + input_arg { + name: "callee_conditionally_executed_blocks" + type: DT_INT64 + } + input_arg { + name: "callee_users" + type: DT_INT64 + } + input_arg { + name: "caller_basic_block_count" + type: DT_INT64 + } + input_arg { + name: "caller_conditionally_executed_blocks" + type: DT_INT64 + } + input_arg { + name: "caller_users" + type: DT_INT64 + } + input_arg { + name: "callsite_height" + type: DT_INT64 + } + input_arg { + name: "cost_estimate" + type: DT_INT64 + } + input_arg { + name: "edge_count" + type: DT_INT64 + } + input_arg { + name: "inlining_default" + type: DT_INT64 + } + input_arg { + name: "node_count" + type: DT_INT64 + } + input_arg { + name: "nr_ctant_params" + type: DT_INT64 + } + input_arg { + name: "unknown" + type: DT_RESOURCE + } + input_arg { + name: "unknown_0" + type: DT_RESOURCE + } + input_arg { + name: "unknown_1" + type: DT_RESOURCE + } + input_arg { + name: "unknown_2" + type: DT_RESOURCE + } + input_arg { + name: "unknown_3" + type: DT_RESOURCE + } + input_arg { + name: "unknown_4" + type: DT_RESOURCE + } + output_arg { + name: "identity" + type: DT_INT64 + } + is_stateful: true + control_output: "StatefulPartitionedCall" + } + node_def { + name: "StatefulPartitionedCall" + op: "StatefulPartitionedCall" + input: "step_type" + input: "reward" + input: "discount" + input: "callee_basic_block_count" + input: "callee_conditionally_executed_blocks" + input: "callee_users" + input: "caller_basic_block_count" + input: "caller_conditionally_executed_blocks" + input: "caller_users" + input: "callsite_height" + input: "cost_estimate" + input: "edge_count" + input: "inlining_default" + input: "node_count" + input: "nr_ctant_params" + input: "unknown" + input: "unknown_0" + input: "unknown_1" + input: "unknown_2" + input: "unknown_3" + input: "unknown_4" + attr { + key: "Tin" + value { + list { + type: DT_INT32 + type: DT_FLOAT + type: DT_FLOAT + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + i: 15 + i: 16 + i: 17 + i: 18 + i: 19 + i: 20 + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_polymorphic_action_fn_4618978" + } + } + } + experimental_debug_info { + original_node_names: "StatefulPartitionedCall" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "StatefulPartitionedCall:output:0" + input: "^StatefulPartitionedCall" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + control_ret { + key: "StatefulPartitionedCall" + value: "StatefulPartitionedCall" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "step_type" + } + } + } + } + arg_attr { + key: 1 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "reward" + } + } + } + } + arg_attr { + key: 2 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "discount" + } + } + } + } + arg_attr { + key: 3 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callee_basic_block_count" + } + } + } + } + arg_attr { + key: 4 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callee_conditionally_executed_blocks" + } + } + } + } + arg_attr { + key: 5 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callee_users" + } + } + } + } + arg_attr { + key: 6 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "caller_basic_block_count" + } + } + } + } + arg_attr { + key: 7 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "caller_conditionally_executed_blocks" + } + } + } + } + arg_attr { + key: 8 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "caller_users" + } + } + } + } + arg_attr { + key: 9 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callsite_height" + } + } + } + } + arg_attr { + key: 10 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "cost_estimate" + } + } + } + } + arg_attr { + key: 11 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "edge_count" + } + } + } + } + arg_attr { + key: 12 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "inlining_default" + } + } + } + } + arg_attr { + key: 13 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "node_count" + } + } + } + } + arg_attr { + key: 14 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "nr_ctant_params" + } + } + } + } + arg_attr { + key: 15 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 16 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 17 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 18 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 19 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 20 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference_polymorphic_action_fn_4619080" + input_arg { + name: "time_step_step_type" + type: DT_INT32 + } + input_arg { + name: "time_step_reward" + type: DT_FLOAT + } + input_arg { + name: "time_step_discount" + type: DT_FLOAT + } + input_arg { + name: "time_step_observation_callee_basic_block_count" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_callee_conditionally_executed_blocks" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_callee_users" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_caller_basic_block_count" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_caller_conditionally_executed_blocks" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_caller_users" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_callsite_height" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_cost_estimate" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_edge_count" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_inlining_default" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_node_count" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_nr_ctant_params" + type: DT_INT64 + } + input_arg { + name: "unknown" + type: DT_RESOURCE + } + input_arg { + name: "unknown_0" + type: DT_RESOURCE + } + input_arg { + name: "unknown_1" + type: DT_RESOURCE + } + input_arg { + name: "unknown_2" + type: DT_RESOURCE + } + input_arg { + name: "unknown_3" + type: DT_RESOURCE + } + input_arg { + name: "unknown_4" + type: DT_RESOURCE + } + output_arg { + name: "identity" + type: DT_INT64 + } + is_stateful: true + control_output: "StatefulPartitionedCall" + } + node_def { + name: "StatefulPartitionedCall" + op: "StatefulPartitionedCall" + input: "time_step_step_type" + input: "time_step_reward" + input: "time_step_discount" + input: "time_step_observation_callee_basic_block_count" + input: "time_step_observation_callee_conditionally_executed_blocks" + input: "time_step_observation_callee_users" + input: "time_step_observation_caller_basic_block_count" + input: "time_step_observation_caller_conditionally_executed_blocks" + input: "time_step_observation_caller_users" + input: "time_step_observation_callsite_height" + input: "time_step_observation_cost_estimate" + input: "time_step_observation_edge_count" + input: "time_step_observation_inlining_default" + input: "time_step_observation_node_count" + input: "time_step_observation_nr_ctant_params" + input: "unknown" + input: "unknown_0" + input: "unknown_1" + input: "unknown_2" + input: "unknown_3" + input: "unknown_4" + attr { + key: "Tin" + value { + list { + type: DT_INT32 + type: DT_FLOAT + type: DT_FLOAT + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + i: 15 + i: 16 + i: 17 + i: 18 + i: 19 + i: 20 + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_action_931" + } + } + } + experimental_debug_info { + original_node_names: "StatefulPartitionedCall" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "StatefulPartitionedCall:output:0" + input: "^StatefulPartitionedCall" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + control_ret { + key: "StatefulPartitionedCall" + value: "StatefulPartitionedCall" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/step_type" + } + } + } + } + arg_attr { + key: 1 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/reward" + } + } + } + } + arg_attr { + key: 2 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/discount" + } + } + } + } + arg_attr { + key: 3 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/callee_basic_block_count" + } + } + } + } + arg_attr { + key: 4 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/callee_conditionally_executed_blocks" + } + } + } + } + arg_attr { + key: 5 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/callee_users" + } + } + } + } + arg_attr { + key: 6 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/caller_basic_block_count" + } + } + } + } + arg_attr { + key: 7 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/caller_conditionally_executed_blocks" + } + } + } + } + arg_attr { + key: 8 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/caller_users" + } + } + } + } + arg_attr { + key: 9 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/callsite_height" + } + } + } + } + arg_attr { + key: 10 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/cost_estimate" + } + } + } + } + arg_attr { + key: 11 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/edge_count" + } + } + } + } + arg_attr { + key: 12 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/inlining_default" + } + } + } + } + arg_attr { + key: 13 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/node_count" + } + } + } + } + arg_attr { + key: 14 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/nr_ctant_params" + } + } + } + } + arg_attr { + key: 15 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 16 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 17 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 18 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 19 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 20 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference_function_with_signature_4619040" + input_arg { + name: "unknown" + type: DT_RESOURCE + } + output_arg { + name: "identity" + type: DT_INT64 + } + is_stateful: true + control_output: "StatefulPartitionedCall" + } + node_def { + name: "StatefulPartitionedCall" + op: "StatefulPartitionedCall" + input: "unknown" + attr { + key: "Tin" + value { + list { + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + i: 0 + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_<lambda>_728" + } + } + } + experimental_debug_info { + original_node_names: "StatefulPartitionedCall" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "StatefulPartitionedCall:output:0" + input: "^StatefulPartitionedCall" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + control_ret { + key: "StatefulPartitionedCall" + value: "StatefulPartitionedCall" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference_polymorphic_action_fn_4618978" + input_arg { + name: "time_step" + type: DT_INT32 + } + input_arg { + name: "time_step_1" + type: DT_FLOAT + } + input_arg { + name: "time_step_2" + type: DT_FLOAT + } + input_arg { + name: "time_step_3" + type: DT_INT64 + } + input_arg { + name: "time_step_4" + type: DT_INT64 + } + input_arg { + name: "time_step_5" + type: DT_INT64 + } + input_arg { + name: "time_step_6" + type: DT_INT64 + } + input_arg { + name: "time_step_7" + type: DT_INT64 + } + input_arg { + name: "time_step_8" + type: DT_INT64 + } + input_arg { + name: "time_step_9" + type: DT_INT64 + } + input_arg { + name: "time_step_10" + type: DT_INT64 + } + input_arg { + name: "time_step_11" + type: DT_INT64 + } + input_arg { + name: "time_step_12" + type: DT_INT64 + } + input_arg { + name: "time_step_13" + type: DT_INT64 + } + input_arg { + name: "time_step_14" + type: DT_INT64 + } + input_arg { + name: "unknown" + type: DT_RESOURCE + } + input_arg { + name: "unknown_0" + type: DT_RESOURCE + } + input_arg { + name: "unknown_1" + type: DT_RESOURCE + } + input_arg { + name: "unknown_2" + type: DT_RESOURCE + } + input_arg { + name: "unknown_3" + type: DT_RESOURCE + } + input_arg { + name: "unknown_4" + type: DT_RESOURCE + } + output_arg { + name: "identity" + type: DT_INT64 + } + is_stateful: true + control_output: "StatefulPartitionedCall" + } + node_def { + name: "StatefulPartitionedCall" + op: "StatefulPartitionedCall" + input: "time_step" + input: "time_step_1" + input: "time_step_2" + input: "time_step_3" + input: "time_step_4" + input: "time_step_5" + input: "time_step_6" + input: "time_step_7" + input: "time_step_8" + input: "time_step_9" + input: "time_step_10" + input: "time_step_11" + input: "time_step_12" + input: "time_step_13" + input: "time_step_14" + input: "unknown" + input: "unknown_0" + input: "unknown_1" + input: "unknown_2" + input: "unknown_3" + input: "unknown_4" + attr { + key: "Tin" + value { + list { + type: DT_INT32 + type: DT_FLOAT + type: DT_FLOAT + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + i: 15 + i: 16 + i: 17 + i: 18 + i: 19 + i: 20 + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_action_931" + } + } + } + experimental_debug_info { + original_node_names: "StatefulPartitionedCall" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "StatefulPartitionedCall:output:0" + input: "^StatefulPartitionedCall" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + control_ret { + key: "StatefulPartitionedCall" + value: "StatefulPartitionedCall" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 1 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 2 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 3 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 4 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 5 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 6 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 7 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 8 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 9 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 10 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 11 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 12 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 13 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 14 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 15 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 16 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 17 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 18 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 19 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 20 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference_polymorphic_action_fn_946" + input_arg { + name: "step_type" + type: DT_INT32 + } + input_arg { + name: "reward" + type: DT_FLOAT + } + input_arg { + name: "discount" + type: DT_FLOAT + } + input_arg { + name: "callee_basic_block_count" + type: DT_INT64 + } + input_arg { + name: "callee_conditionally_executed_blocks" + type: DT_INT64 + } + input_arg { + name: "callee_users" + type: DT_INT64 + } + input_arg { + name: "caller_basic_block_count" + type: DT_INT64 + } + input_arg { + name: "caller_conditionally_executed_blocks" + type: DT_INT64 + } + input_arg { + name: "caller_users" + type: DT_INT64 + } + input_arg { + name: "callsite_height" + type: DT_INT64 + } + input_arg { + name: "cost_estimate" + type: DT_INT64 + } + input_arg { + name: "edge_count" + type: DT_INT64 + } + input_arg { + name: "inlining_default" + type: DT_INT64 + } + input_arg { + name: "node_count" + type: DT_INT64 + } + input_arg { + name: "nr_ctant_params" + type: DT_INT64 + } + input_arg { + name: "unknown" + type: DT_RESOURCE + } + input_arg { + name: "unknown_0" + type: DT_RESOURCE + } + input_arg { + name: "unknown_1" + type: DT_RESOURCE + } + input_arg { + name: "unknown_2" + type: DT_RESOURCE + } + input_arg { + name: "unknown_3" + type: DT_RESOURCE + } + input_arg { + name: "unknown_4" + type: DT_RESOURCE + } + output_arg { + name: "identity" + type: DT_INT64 + } + is_stateful: true + control_output: "StatefulPartitionedCall" + } + node_def { + name: "StatefulPartitionedCall" + op: "StatefulPartitionedCall" + input: "step_type" + input: "reward" + input: "discount" + input: "callee_basic_block_count" + input: "callee_conditionally_executed_blocks" + input: "callee_users" + input: "caller_basic_block_count" + input: "caller_conditionally_executed_blocks" + input: "caller_users" + input: "callsite_height" + input: "cost_estimate" + input: "edge_count" + input: "inlining_default" + input: "node_count" + input: "nr_ctant_params" + input: "unknown" + input: "unknown_0" + input: "unknown_1" + input: "unknown_2" + input: "unknown_3" + input: "unknown_4" + attr { + key: "Tin" + value { + list { + type: DT_INT32 + type: DT_FLOAT + type: DT_FLOAT + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + i: 15 + i: 16 + i: 17 + i: 18 + i: 19 + i: 20 + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_action_931" + } + } + } + experimental_debug_info { + original_node_names: "StatefulPartitionedCall" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "StatefulPartitionedCall:output:0" + input: "^StatefulPartitionedCall" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + control_ret { + key: "StatefulPartitionedCall" + value: "StatefulPartitionedCall" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "step_type" + } + } + } + } + arg_attr { + key: 1 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "reward" + } + } + } + } + arg_attr { + key: 2 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "discount" + } + } + } + } + arg_attr { + key: 3 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callee_basic_block_count" + } + } + } + } + arg_attr { + key: 4 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callee_conditionally_executed_blocks" + } + } + } + } + arg_attr { + key: 5 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callee_users" + } + } + } + } + arg_attr { + key: 6 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "caller_basic_block_count" + } + } + } + } + arg_attr { + key: 7 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "caller_conditionally_executed_blocks" + } + } + } + } + arg_attr { + key: 8 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "caller_users" + } + } + } + } + arg_attr { + key: 9 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callsite_height" + } + } + } + } + arg_attr { + key: 10 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "cost_estimate" + } + } + } + } + arg_attr { + key: 11 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "edge_count" + } + } + } + } + arg_attr { + key: 12 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "inlining_default" + } + } + } + } + arg_attr { + key: 13 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "node_count" + } + } + } + } + arg_attr { + key: 14 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "nr_ctant_params" + } + } + } + } + arg_attr { + key: 15 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 16 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 17 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 18 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 19 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 20 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference__traced_restore_4619176" + input_arg { + name: "file_prefix" + type: DT_STRING + } + input_arg { + name: "assignvariableop_train_step" + type: DT_RESOURCE + } + input_arg { + name: "assignvariableop_1_qnetwork_encodingnetwork_dense_kernel" + type: DT_RESOURCE + } + input_arg { + name: "assignvariableop_2_qnetwork_encodingnetwork_dense_bias" + type: DT_RESOURCE + } + input_arg { + name: "assignvariableop_3_qnetwork_encodingnetwork_dense_1_kernel" + type: DT_RESOURCE + } + input_arg { + name: "assignvariableop_4_qnetwork_encodingnetwork_dense_1_bias" + type: DT_RESOURCE + } + input_arg { + name: "assignvariableop_5_qnetwork_dense_2_kernel" + type: DT_RESOURCE + } + input_arg { + name: "assignvariableop_6_qnetwork_dense_2_bias" + type: DT_RESOURCE + } + output_arg { + name: "identity_8" + type: DT_STRING + } + is_stateful: true + control_output: "AssignVariableOp" + control_output: "AssignVariableOp_1" + control_output: "AssignVariableOp_2" + control_output: "AssignVariableOp_3" + control_output: "AssignVariableOp_4" + control_output: "AssignVariableOp_5" + control_output: "AssignVariableOp_6" + control_output: "RestoreV2" + control_output: "RestoreV2_1" + } + node_def { + name: "RestoreV2/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 7 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 7 + } + } + string_val: "train_step/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/0/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/1/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/2/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/3/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/4/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/5/.ATTRIBUTES/VARIABLE_VALUE" + } + } + } + experimental_debug_info { + original_node_names: "RestoreV2/tensor_names" + } + } + node_def { + name: "RestoreV2/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 7 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 7 + } + } + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + } + } + } + experimental_debug_info { + original_node_names: "RestoreV2/shape_and_slices" + } + } + node_def { + name: "RestoreV2" + op: "RestoreV2" + input: "file_prefix" + input: "RestoreV2/tensor_names:output:0" + input: "RestoreV2/shape_and_slices:output:0" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_INT64 + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + } + } + } + experimental_debug_info { + original_node_names: "RestoreV2" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "RestoreV2:tensors:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + node_def { + name: "AssignVariableOp" + op: "AssignVariableOp" + input: "assignvariableop_train_step" + input: "Identity:output:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + experimental_debug_info { + original_node_names: "AssignVariableOp" + } + } + node_def { + name: "Identity_1" + op: "Identity" + input: "RestoreV2:tensors:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + experimental_debug_info { + original_node_names: "Identity_1" + } + } + node_def { + name: "AssignVariableOp_1" + op: "AssignVariableOp" + input: "assignvariableop_1_qnetwork_encodingnetwork_dense_kernel" + input: "Identity_1:output:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "AssignVariableOp_1" + } + } + node_def { + name: "Identity_2" + op: "Identity" + input: "RestoreV2:tensors:2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + experimental_debug_info { + original_node_names: "Identity_2" + } + } + node_def { + name: "AssignVariableOp_2" + op: "AssignVariableOp" + input: "assignvariableop_2_qnetwork_encodingnetwork_dense_bias" + input: "Identity_2:output:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "AssignVariableOp_2" + } + } + node_def { + name: "Identity_3" + op: "Identity" + input: "RestoreV2:tensors:3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + experimental_debug_info { + original_node_names: "Identity_3" + } + } + node_def { + name: "AssignVariableOp_3" + op: "AssignVariableOp" + input: "assignvariableop_3_qnetwork_encodingnetwork_dense_1_kernel" + input: "Identity_3:output:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "AssignVariableOp_3" + } + } + node_def { + name: "Identity_4" + op: "Identity" + input: "RestoreV2:tensors:4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + experimental_debug_info { + original_node_names: "Identity_4" + } + } + node_def { + name: "AssignVariableOp_4" + op: "AssignVariableOp" + input: "assignvariableop_4_qnetwork_encodingnetwork_dense_1_bias" + input: "Identity_4:output:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "AssignVariableOp_4" + } + } + node_def { + name: "Identity_5" + op: "Identity" + input: "RestoreV2:tensors:5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + experimental_debug_info { + original_node_names: "Identity_5" + } + } + node_def { + name: "AssignVariableOp_5" + op: "AssignVariableOp" + input: "assignvariableop_5_qnetwork_dense_2_kernel" + input: "Identity_5:output:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "AssignVariableOp_5" + } + } + node_def { + name: "Identity_6" + op: "Identity" + input: "RestoreV2:tensors:6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + experimental_debug_info { + original_node_names: "Identity_6" + } + } + node_def { + name: "AssignVariableOp_6" + op: "AssignVariableOp" + input: "assignvariableop_6_qnetwork_dense_2_bias" + input: "Identity_6:output:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "AssignVariableOp_6" + } + } + node_def { + name: "RestoreV2_1/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "_CHECKPOINTABLE_OBJECT_GRAPH" + } + } + } + experimental_debug_info { + original_node_names: "RestoreV2_1/tensor_names" + } + } + node_def { + name: "RestoreV2_1/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } + experimental_debug_info { + original_node_names: "RestoreV2_1/shape_and_slices" + } + } + node_def { + name: "RestoreV2_1" + op: "RestoreV2" + input: "file_prefix" + input: "RestoreV2_1/tensor_names:output:0" + input: "RestoreV2_1/shape_and_slices:output:0" + input: "^RestoreV2" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_STRING + } + } + } + experimental_debug_info { + original_node_names: "RestoreV2_1" + } + } + node_def { + name: "NoOp" + op: "NoOp" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + experimental_debug_info { + original_node_names: "NoOp" + } + } + node_def { + name: "Identity_7" + op: "Identity" + input: "file_prefix" + input: "^AssignVariableOp" + input: "^AssignVariableOp_1" + input: "^AssignVariableOp_2" + input: "^AssignVariableOp_3" + input: "^AssignVariableOp_4" + input: "^AssignVariableOp_5" + input: "^AssignVariableOp_6" + input: "^NoOp" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "Identity_7" + } + } + node_def { + name: "Identity_8" + op: "Identity" + input: "Identity_7:output:0" + input: "^AssignVariableOp" + input: "^AssignVariableOp_1" + input: "^AssignVariableOp_2" + input: "^AssignVariableOp_3" + input: "^AssignVariableOp_4" + input: "^AssignVariableOp_5" + input: "^AssignVariableOp_6" + input: "^RestoreV2" + input: "^RestoreV2_1" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "Identity_8" + } + } + ret { + key: "identity_8" + value: "Identity_8:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + control_ret { + key: "AssignVariableOp" + value: "AssignVariableOp" + } + control_ret { + key: "AssignVariableOp_1" + value: "AssignVariableOp_1" + } + control_ret { + key: "AssignVariableOp_2" + value: "AssignVariableOp_2" + } + control_ret { + key: "AssignVariableOp_3" + value: "AssignVariableOp_3" + } + control_ret { + key: "AssignVariableOp_4" + value: "AssignVariableOp_4" + } + control_ret { + key: "AssignVariableOp_5" + value: "AssignVariableOp_5" + } + control_ret { + key: "AssignVariableOp_6" + value: "AssignVariableOp_6" + } + control_ret { + key: "RestoreV2" + value: "RestoreV2" + } + control_ret { + key: "RestoreV2_1" + value: "RestoreV2_1" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "file_prefix" + } + } + } + } + arg_attr { + key: 1 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 2 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 3 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 4 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 5 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 6 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 7 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference_<lambda>_728" + input_arg { + name: "readvariableop_resource" + type: DT_RESOURCE + } + output_arg { + name: "identity" + type: DT_INT64 + } + is_stateful: true + } + node_def { + name: "ReadVariableOp" + op: "ReadVariableOp" + input: "readvariableop_resource" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + experimental_debug_info { + original_node_names: "ReadVariableOp" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "ReadVariableOp:value:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + } + versions { + producer: 357 + min_consumer: 12 + } + } + saver_def { + filename_tensor_name: "saver_filename:0" + save_tensor_name: "StatefulPartitionedCall_2:0" + restore_op_name: "StatefulPartitionedCall_3" + version: V2 + } + collection_def { + key: "saved_model_main_op" + value { + node_list { + value: "NoOp" + } + } + } + signature_def { + key: "__saved_model_init_op" + value { + outputs { + key: "__saved_model_init_op" + value { + name: "NoOp" + tensor_shape { + unknown_rank: true + } + } + } + } + } + signature_def { + key: "action" + value { + inputs { + key: "callee_basic_block_count" + value { + name: "action_callee_basic_block_count:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "callee_conditionally_executed_blocks" + value { + name: "action_callee_conditionally_executed_blocks:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "callee_users" + value { + name: "action_callee_users:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "caller_basic_block_count" + value { + name: "action_caller_basic_block_count:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "caller_conditionally_executed_blocks" + value { + name: "action_caller_conditionally_executed_blocks:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "caller_users" + value { + name: "action_caller_users:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "callsite_height" + value { + name: "action_callsite_height:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "cost_estimate" + value { + name: "action_cost_estimate:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "discount" + value { + name: "action_discount:0" + dtype: DT_FLOAT + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "edge_count" + value { + name: "action_edge_count:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "inlining_default" + value { + name: "action_inlining_default:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "node_count" + value { + name: "action_node_count:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "nr_ctant_params" + value { + name: "action_nr_ctant_params:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "reward" + value { + name: "action_reward:0" + dtype: DT_FLOAT + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "step_type" + value { + name: "action_step_type:0" + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + } + } + outputs { + key: "inlining_decision" + value { + name: "StatefulPartitionedCall:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + method_name: "tensorflow/serving/predict" + } + } + signature_def { + key: "get_initial_state" + value { + method_name: "tensorflow/serving/predict" + } + } + signature_def { + key: "get_train_step" + value { + outputs { + key: "int64" + value { + name: "StatefulPartitionedCall_1:0" + dtype: DT_INT64 + tensor_shape { + } + } + } + method_name: "tensorflow/serving/predict" + } + } + object_graph_def { + nodes { + children { + node_id: 1 + local_name: "_time_step_spec" + } + children { + node_id: 2 + local_name: "_trajectory_spec" + } + children { + node_id: 3 + local_name: "_wrapped_policy" + } + children { + node_id: 4 + local_name: "train_step" + } + children { + node_id: 5 + local_name: "model_variables" + } + children { + node_id: 6 + local_name: "signatures" + } + children { + node_id: 210 + local_name: "action" + } + children { + node_id: 211 + local_name: "get_initial_state" + } + children { + node_id: 212 + local_name: "get_train_step" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 7 + local_name: "observation" + } + children { + node_id: 7 + local_name: "3" + } + user_object { + identifier: "trackable_tuple_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 7 + local_name: "observation" + } + children { + node_id: 7 + local_name: "1" + } + user_object { + identifier: "trackable_tuple_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 8 + local_name: "_q_network" + } + children { + node_id: 1 + local_name: "_time_step_spec" + } + children { + node_id: 9 + local_name: "_trajectory_spec" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + variable { + dtype: DT_INT64 + shape { + } + name: "train_step" + } + } + nodes { + children { + node_id: 10 + local_name: "0" + } + children { + node_id: 11 + local_name: "1" + } + children { + node_id: 12 + local_name: "2" + } + children { + node_id: 13 + local_name: "3" + } + children { + node_id: 14 + local_name: "4" + } + children { + node_id: 15 + local_name: "5" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 213 + local_name: "action" + } + children { + node_id: 214 + local_name: "get_initial_state" + } + children { + node_id: 215 + local_name: "get_train_step" + } + user_object { + identifier: "signature_map" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 16 + local_name: "_input_tensor_spec" + } + children { + node_id: 17 + local_name: "_encoder" + } + children { + node_id: 18 + local_name: "_q_value_layer" + } + children { + node_id: 19 + local_name: "variables" + } + children { + node_id: 20 + local_name: "regularization_losses" + } + children { + node_id: 21 + local_name: "trainable_variables" + } + children { + node_id: 22 + local_name: "keras_api" + } + children { + node_id: 216 + local_name: "__call__" + } + children { + node_id: 217 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_network" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"QNetwork\", \"name\": \"QNetwork\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"config\": {\"layer was saved without config\": true}, \"is_graph_network\": false}" + } + } + nodes { + children { + node_id: 7 + local_name: "observation" + } + children { + node_id: 7 + local_name: "1" + } + user_object { + identifier: "trackable_tuple_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + variable { + dtype: DT_FLOAT + shape { + dim { + size: 34 + } + dim { + size: 100 + } + } + trainable: true + name: "QNetwork/EncodingNetwork/dense/kernel" + } + } + nodes { + variable { + dtype: DT_FLOAT + shape { + dim { + size: 100 + } + } + trainable: true + name: "QNetwork/EncodingNetwork/dense/bias" + } + } + nodes { + variable { + dtype: DT_FLOAT + shape { + dim { + size: 100 + } + dim { + size: 40 + } + } + trainable: true + name: "QNetwork/EncodingNetwork/dense_1/kernel" + } + } + nodes { + variable { + dtype: DT_FLOAT + shape { + dim { + size: 40 + } + } + trainable: true + name: "QNetwork/EncodingNetwork/dense_1/bias" + } + } + nodes { + variable { + dtype: DT_FLOAT + shape { + dim { + size: 40 + } + dim { + size: 2 + } + } + trainable: true + name: "QNetwork/dense_2/kernel" + } + } + nodes { + variable { + dtype: DT_FLOAT + shape { + dim { + size: 2 + } + } + trainable: true + name: "QNetwork/dense_2/bias" + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 23 + local_name: "_input_tensor_spec" + } + children { + node_id: 24 + local_name: "_preprocessing_nest" + } + children { + node_id: 25 + local_name: "_flat_preprocessing_layers" + } + children { + node_id: 26 + local_name: "_preprocessing_combiner" + } + children { + node_id: 27 + local_name: "_postprocessing_layers" + } + children { + node_id: 28 + local_name: "variables" + } + children { + node_id: 29 + local_name: "regularization_losses" + } + children { + node_id: 30 + local_name: "trainable_variables" + } + children { + node_id: 31 + local_name: "keras_api" + } + children { + node_id: 218 + local_name: "__call__" + } + children { + node_id: 219 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_network" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"EncodingNetwork\", \"name\": \"EncodingNetwork\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"config\": {\"layer was saved without config\": true}, \"is_graph_network\": false}" + } + } + nodes { + children { + node_id: 14 + local_name: "kernel" + } + children { + node_id: 15 + local_name: "bias" + } + children { + node_id: 32 + local_name: "variables" + } + children { + node_id: 33 + local_name: "regularization_losses" + } + children { + node_id: 34 + local_name: "trainable_variables" + } + children { + node_id: 35 + local_name: "keras_api" + } + children { + node_id: 220 + local_name: "__call__" + } + children { + node_id: 221 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Dense\", \"name\": \"dense_2\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"dense_2\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 2, \"activation\": \"linear\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"RandomUniform\", \"config\": {\"minval\": -0.03, \"maxval\": 0.03, \"seed\": null, \"dtype\": \"float32\"}}, \"bias_initializer\": {\"class_name\": \"Constant\", \"config\": {\"value\": -0.2, \"dtype\": \"float32\"}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 2, \"axes\": {\"-1\": 40}}}, \"build_input_shape\": {\"class_name\": \"TensorShape\", \"items\": [0, 40]}}" + } + } + nodes { + children { + node_id: 10 + local_name: "0" + } + children { + node_id: 11 + local_name: "1" + } + children { + node_id: 12 + local_name: "2" + } + children { + node_id: 13 + local_name: "3" + } + children { + node_id: 14 + local_name: "4" + } + children { + node_id: 15 + local_name: "5" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 10 + local_name: "0" + } + children { + node_id: 11 + local_name: "1" + } + children { + node_id: 12 + local_name: "2" + } + children { + node_id: 13 + local_name: "3" + } + children { + node_id: 14 + local_name: "4" + } + children { + node_id: 15 + local_name: "5" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 36 + local_name: "layer_metrics" + } + children { + node_id: 19 + local_name: "variables" + } + children { + node_id: 37 + local_name: "layer_regularization_losses" + } + children { + node_id: 38 + local_name: "metrics" + } + children { + node_id: 39 + local_name: "layers" + } + children { + node_id: 20 + local_name: "regularization_losses" + } + children { + node_id: 40 + local_name: "non_trainable_variables" + } + children { + node_id: 21 + local_name: "trainable_variables" + } + children { + node_id: 216 + local_name: "__call__" + } + children { + node_id: 217 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 217 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 41 + local_name: "0" + } + children { + node_id: 42 + local_name: "1" + } + children { + node_id: 43 + local_name: "2" + } + children { + node_id: 44 + local_name: "3" + } + children { + node_id: 45 + local_name: "4" + } + children { + node_id: 46 + local_name: "5" + } + children { + node_id: 47 + local_name: "6" + } + children { + node_id: 48 + local_name: "7" + } + children { + node_id: 49 + local_name: "8" + } + children { + node_id: 50 + local_name: "9" + } + children { + node_id: 51 + local_name: "10" + } + children { + node_id: 52 + local_name: "11" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 53 + local_name: "variables" + } + children { + node_id: 54 + local_name: "regularization_losses" + } + children { + node_id: 55 + local_name: "trainable_variables" + } + children { + node_id: 56 + local_name: "keras_api" + } + children { + node_id: 222 + local_name: "__call__" + } + children { + node_id: 223 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Concatenate\", \"name\": \"concatenate\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"concatenate\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}, \"build_input_shape\": [{\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 1]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}]}" + } + } + nodes { + children { + node_id: 57 + local_name: "0" + } + children { + node_id: 58 + local_name: "1" + } + children { + node_id: 59 + local_name: "2" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 10 + local_name: "0" + } + children { + node_id: 11 + local_name: "1" + } + children { + node_id: 12 + local_name: "2" + } + children { + node_id: 13 + local_name: "3" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 10 + local_name: "0" + } + children { + node_id: 11 + local_name: "1" + } + children { + node_id: 12 + local_name: "2" + } + children { + node_id: 13 + local_name: "3" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 60 + local_name: "layer_metrics" + } + children { + node_id: 28 + local_name: "variables" + } + children { + node_id: 61 + local_name: "layer_regularization_losses" + } + children { + node_id: 62 + local_name: "metrics" + } + children { + node_id: 63 + local_name: "layers" + } + children { + node_id: 29 + local_name: "regularization_losses" + } + children { + node_id: 64 + local_name: "non_trainable_variables" + } + children { + node_id: 30 + local_name: "trainable_variables" + } + children { + node_id: 218 + local_name: "__call__" + } + children { + node_id: 219 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 219 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 14 + local_name: "0" + } + children { + node_id: 15 + local_name: "1" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 14 + local_name: "0" + } + children { + node_id: 15 + local_name: "1" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 65 + local_name: "layer_metrics" + } + children { + node_id: 32 + local_name: "variables" + } + children { + node_id: 66 + local_name: "layer_regularization_losses" + } + children { + node_id: 67 + local_name: "metrics" + } + children { + node_id: 68 + local_name: "layers" + } + children { + node_id: 33 + local_name: "regularization_losses" + } + children { + node_id: 69 + local_name: "non_trainable_variables" + } + children { + node_id: 34 + local_name: "trainable_variables" + } + children { + node_id: 220 + local_name: "__call__" + } + children { + node_id: 221 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 221 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 17 + local_name: "0" + } + children { + node_id: 18 + local_name: "1" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 70 + local_name: "variables" + } + children { + node_id: 71 + local_name: "regularization_losses" + } + children { + node_id: 72 + local_name: "trainable_variables" + } + children { + node_id: 73 + local_name: "keras_api" + } + children { + node_id: 224 + local_name: "__call__" + } + children { + node_id: 225 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 11.0, 12.0, 13.0, 14.0, 14.0, 14.0, 16.0, 17.0, 19.0, 23.0, 27.0, 39.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 74 + local_name: "variables" + } + children { + node_id: 75 + local_name: "regularization_losses" + } + children { + node_id: 76 + local_name: "trainable_variables" + } + children { + node_id: 77 + local_name: "keras_api" + } + children { + node_id: 226 + local_name: "__call__" + } + children { + node_id: 227 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_1\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_1\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 8.0, 8.0, 8.0, 8.0, 9.0, 10.0, 10.0, 10.0, 12.0, 12.0, 12.0, 14.0, 14.0, 18.0, 20.0, 23.0, 30.0, 41.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 78 + local_name: "variables" + } + children { + node_id: 79 + local_name: "regularization_losses" + } + children { + node_id: 80 + local_name: "trainable_variables" + } + children { + node_id: 81 + local_name: "keras_api" + } + children { + node_id: 228 + local_name: "__call__" + } + children { + node_id: 229 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_2\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_2\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 18.0, 18.0, 18.0, 18.0, 18.0, 19.0, 19.0, 19.0, 19.0, 19.0, 20.0, 20.0, 20.0, 20.0, 20.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 22.0, 22.0, 22.0, 22.0, 23.0, 23.0, 23.0, 24.0, 24.0, 24.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 26.0, 26.0, 26.0, 27.0, 27.0, 27.0, 27.0, 28.0, 28.0, 29.0, 29.0, 29.0, 29.0, 30.0, 30.0, 31.0, 31.0, 31.0, 31.0, 32.0, 32.0, 33.0, 33.0, 33.0, 34.0, 34.0, 34.0, 34.0, 35.0, 35.0, 36.0, 36.0, 37.0, 37.0, 37.0, 38.0, 38.0, 39.0, 39.0, 40.0, 40.0, 41.0, 41.0, 41.0, 42.0, 43.0, 43.0, 44.0, 44.0, 45.0, 45.0, 46.0, 46.0, 46.0, 47.0, 47.0, 48.0, 49.0, 49.0, 50.0, 50.0, 51.0, 52.0, 53.0, 53.0, 54.0, 55.0, 56.0, 57.0, 57.0, 58.0, 59.0, 60.0, 61.0, 61.0, 63.0, 63.0, 64.0, 65.0, 66.0, 67.0, 67.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 85.0, 86.0, 88.0, 89.0, 91.0, 92.0, 94.0, 96.0, 97.0, 99.0, 100.0, 101.0, 103.0, 105.0, 107.0, 109.0, 111.0, 113.0, 115.0, 118.0, 121.0, 123.0, 126.0, 128.0, 130.0, 133.0, 135.0, 137.0, 140.0, 143.0, 146.0, 148.0, 151.0, 154.0, 157.0, 161.0, 163.0, 166.0, 169.0, 173.0, 178.0, 183.0, 189.0, 193.0, 197.0, 202.0, 208.0, 213.0, 218.0, 223.0, 228.0, 233.0, 239.0, 245.0, 250.0, 257.0, 262.0, 269.0, 277.0, 284.0, 292.0, 300.0, 308.0, 319.0, 329.0, 340.0, 349.0, 359.0, 371.0, 382.0, 394.0, 410.0, 423.0, 435.0, 445.0, 462.0, 480.0, 492.0, 506.0, 519.0, 536.0, 557.0, 577.0, 598.0, 622.0, 655.0, 679.0, 707.0, 733.0, 751.0, 787.0, 814.0, 847.0, 897.0, 934.0, 997.0, 1062.0, 1111.0, 1181.0, 1275.0, 1385.0, 1465.0, 1603.0, 1769.0, 2057.0, 2257.0, 2803.0, 3468.0, 4417.0, 6538.0, 16126.0, 23446.0, 33536.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 82 + local_name: "variables" + } + children { + node_id: 83 + local_name: "regularization_losses" + } + children { + node_id: 84 + local_name: "trainable_variables" + } + children { + node_id: 85 + local_name: "keras_api" + } + children { + node_id: 230 + local_name: "__call__" + } + children { + node_id: 231 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_3\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_3\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 20.0, 20.0, 20.0, 20.0, 20.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 23.0, 23.0, 23.0, 24.0, 24.0, 24.0, 24.0, 24.0, 24.0, 25.0, 25.0, 25.0, 25.0, 25.0, 26.0, 26.0, 26.0, 26.0, 27.0, 27.0, 27.0, 27.0, 27.0, 28.0, 28.0, 28.0, 29.0, 29.0, 29.0, 29.0, 30.0, 30.0, 30.0, 31.0, 31.0, 31.0, 32.0, 32.0, 32.0, 33.0, 33.0, 33.0, 34.0, 34.0, 34.0, 34.0, 35.0, 35.0, 35.0, 36.0, 36.0, 36.0, 37.0, 37.0, 37.0, 38.0, 38.0, 38.0, 38.0, 39.0, 39.0, 40.0, 40.0, 41.0, 41.0, 42.0, 43.0, 43.0, 44.0, 45.0, 45.0, 46.0, 47.0, 47.0, 48.0, 49.0, 49.0, 50.0, 50.0, 52.0, 52.0, 53.0, 54.0, 55.0, 55.0, 57.0, 58.0, 59.0, 60.0, 62.0, 64.0, 65.0, 66.0, 68.0, 70.0, 70.0, 70.0, 70.0, 70.0, 71.0, 73.0, 75.0, 76.0, 78.0, 81.0, 84.0, 86.0, 90.0, 94.0, 98.0, 101.0, 106.0, 111.0, 117.0, 123.0, 130.0, 138.0, 146.0, 157.0, 163.0, 176.0, 187.0, 198.0, 214.0, 227.0, 252.0, 280.0, 327.0, 395.0, 506.0, 671.0, 1025.0, 1971.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 86 + local_name: "variables" + } + children { + node_id: 87 + local_name: "regularization_losses" + } + children { + node_id: 88 + local_name: "trainable_variables" + } + children { + node_id: 89 + local_name: "keras_api" + } + children { + node_id: 232 + local_name: "__call__" + } + children { + node_id: 233 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_4\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_4\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 13.0, 13.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 19.0, 19.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 21.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 24.0, 24.0, 24.0, 24.0, 24.0, 24.0, 24.0, 24.0, 25.0, 26.0, 26.0, 26.0, 26.0, 26.0, 26.0, 26.0, 26.0, 26.0, 26.0, 27.0, 28.0, 28.0, 28.0, 28.0, 28.0, 29.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 31.0, 32.0, 32.0, 32.0, 32.0, 32.0, 34.0, 34.0, 34.0, 34.0, 34.0, 34.0, 35.0, 36.0, 36.0, 36.0, 37.0, 38.0, 38.0, 38.0, 39.0, 40.0, 40.0, 41.0, 42.0, 42.0, 43.0, 44.0, 44.0, 46.0, 46.0, 47.0, 48.0, 48.0, 50.0, 50.0, 52.0, 52.0, 54.0, 55.0, 55.0, 56.0, 57.0, 58.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 62.0, 62.0, 64.0, 65.0, 66.0, 68.0, 70.0, 72.0, 74.0, 77.0, 80.0, 82.0, 86.0, 89.0, 92.0, 96.0, 99.0, 104.0, 108.0, 114.0, 119.0, 125.0, 131.0, 139.0, 146.0, 157.0, 167.0, 176.0, 188.0, 198.0, 215.0, 236.0, 262.0, 306.0, 376.0, 462.0, 596.0, 942.0, 1428.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 90 + local_name: "variables" + } + children { + node_id: 91 + local_name: "regularization_losses" + } + children { + node_id: 92 + local_name: "trainable_variables" + } + children { + node_id: 93 + local_name: "keras_api" + } + children { + node_id: 234 + local_name: "__call__" + } + children { + node_id: 235 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_5\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_5\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 11.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 18.0, 20.0, 23.0, 29.0, 38.0, 60.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 94 + local_name: "variables" + } + children { + node_id: 95 + local_name: "regularization_losses" + } + children { + node_id: 96 + local_name: "trainable_variables" + } + children { + node_id: 97 + local_name: "keras_api" + } + children { + node_id: 236 + local_name: "__call__" + } + children { + node_id: 237 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_6\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_6\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 23.0, 23.0, 23.0, 23.0, 23.0, 23.0, 23.0, 24.0, 24.0, 24.0, 24.0, 24.0, 25.0, 25.0, 25.0, 25.0, 25.0, 26.0, 26.0, 26.0, 26.0, 27.0, 27.0, 27.0, 28.0, 28.0, 28.0, 29.0, 29.0, 30.0, 30.0, 30.0, 31.0, 31.0, 32.0, 32.0, 33.0, 33.0, 34.0, 35.0, 37.0, 38.0, 40.0, 46.0, 51.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 98 + local_name: "variables" + } + children { + node_id: 99 + local_name: "regularization_losses" + } + children { + node_id: 100 + local_name: "trainable_variables" + } + children { + node_id: 101 + local_name: "keras_api" + } + children { + node_id: 238 + local_name: "__call__" + } + children { + node_id: 239 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_7\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_7\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [-15035.0, -15030.0, -15025.0, -15000.0, -14985.0, -14945.0, -14745.0, -70.0, -55.0, -55.0, -50.0, -50.0, -50.0, -45.0, -45.0, -45.0, -45.0, -45.0, -45.0, -45.0, -45.0, -45.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 45.0, 45.0, 45.0, 45.0, 45.0, 45.0, 45.0, 45.0, 45.0, 45.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 55.0, 55.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 70.0, 70.0, 70.0, 70.0, 70.0, 70.0, 70.0, 75.0, 75.0, 80.0, 80.0, 80.0, 85.0, 85.0, 85.0, 90.0, 90.0, 90.0, 90.0, 95.0, 95.0, 100.0, 100.0, 105.0, 110.0, 115.0, 120.0, 125.0, 125.0, 130.0, 140.0, 140.0, 145.0, 150.0, 155.0, 160.0, 160.0, 165.0, 170.0, 175.0, 180.0, 190.0, 200.0, 210.0, 215.0, 220.0, 220.0, 230.0, 235.0, 245.0, 250.0, 260.0, 275.0, 290.0, 305.0, 325.0, 350.0, 370.0, 390.0, 425.0, 460.0, 500.0, 560.0, 650.0, 790.0, 1025.0, 1600.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 102 + local_name: "variables" + } + children { + node_id: 103 + local_name: "regularization_losses" + } + children { + node_id: 104 + local_name: "trainable_variables" + } + children { + node_id: 105 + local_name: "keras_api" + } + children { + node_id: 240 + local_name: "__call__" + } + children { + node_id: 241 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_8\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_8\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [18.0, 29.0, 39.0, 48.0, 57.0, 64.0, 70.0, 76.0, 82.0, 87.0, 92.0, 97.0, 101.0, 105.0, 109.0, 113.0, 116.0, 120.0, 123.0, 127.0, 130.0, 134.0, 137.0, 140.0, 143.0, 146.0, 149.0, 152.0, 156.0, 159.0, 162.0, 165.0, 168.0, 171.0, 174.0, 177.0, 180.0, 183.0, 186.0, 188.0, 191.0, 194.0, 197.0, 200.0, 203.0, 205.0, 208.0, 211.0, 214.0, 217.0, 219.0, 222.0, 225.0, 228.0, 231.0, 233.0, 236.0, 239.0, 242.0, 244.0, 247.0, 250.0, 253.0, 255.0, 258.0, 261.0, 264.0, 266.0, 269.0, 272.0, 275.0, 278.0, 280.0, 283.0, 286.0, 289.0, 292.0, 294.0, 297.0, 300.0, 303.0, 305.0, 308.0, 311.0, 314.0, 317.0, 319.0, 322.0, 325.0, 327.0, 330.0, 333.0, 336.0, 339.0, 341.0, 344.0, 347.0, 350.0, 353.0, 355.0, 358.0, 361.0, 364.0, 367.0, 370.0, 373.0, 375.0, 378.0, 381.0, 384.0, 387.0, 390.0, 393.0, 396.0, 399.0, 401.0, 404.0, 407.0, 410.0, 413.0, 416.0, 419.0, 422.0, 425.0, 428.0, 431.0, 434.0, 437.0, 440.0, 443.0, 446.0, 449.0, 452.0, 455.0, 458.0, 461.0, 464.0, 467.0, 470.0, 473.0, 476.0, 479.0, 483.0, 486.0, 489.0, 492.0, 495.0, 498.0, 501.0, 504.0, 507.0, 511.0, 514.0, 517.0, 520.0, 523.0, 526.0, 530.0, 533.0, 536.0, 539.0, 542.0, 545.0, 549.0, 552.0, 555.0, 558.0, 562.0, 565.0, 569.0, 572.0, 575.0, 579.0, 582.0, 585.0, 589.0, 592.0, 595.0, 599.0, 602.0, 605.0, 609.0, 612.0, 616.0, 620.0, 623.0, 626.0, 630.0, 634.0, 637.0, 641.0, 644.0, 648.0, 651.0, 655.0, 658.0, 662.0, 665.0, 669.0, 672.0, 676.0, 680.0, 683.0, 687.0, 691.0, 694.0, 698.0, 702.0, 705.0, 709.0, 712.0, 716.0, 720.0, 724.0, 727.0, 731.0, 735.0, 739.0, 742.0, 746.0, 750.0, 754.0, 758.0, 761.0, 765.0, 769.0, 773.0, 777.0, 780.0, 784.0, 788.0, 792.0, 796.0, 800.0, 804.0, 808.0, 812.0, 816.0, 820.0, 823.0, 828.0, 832.0, 836.0, 840.0, 844.0, 848.0, 852.0, 856.0, 860.0, 864.0, 868.0, 873.0, 877.0, 881.0, 885.0, 889.0, 893.0, 897.0, 902.0, 906.0, 910.0, 914.0, 919.0, 923.0, 927.0, 931.0, 935.0, 940.0, 944.0, 948.0, 953.0, 957.0, 962.0, 966.0, 970.0, 975.0, 979.0, 984.0, 988.0, 993.0, 997.0, 1002.0, 1006.0, 1011.0, 1015.0, 1020.0, 1024.0, 1029.0, 1034.0, 1038.0, 1043.0, 1047.0, 1052.0, 1057.0, 1062.0, 1066.0, 1071.0, 1076.0, 1081.0, 1086.0, 1090.0, 1095.0, 1100.0, 1105.0, 1110.0, 1114.0, 1119.0, 1124.0, 1129.0, 1134.0, 1139.0, 1144.0, 1149.0, 1154.0, 1159.0, 1164.0, 1169.0, 1174.0, 1179.0, 1184.0, 1189.0, 1194.0, 1199.0, 1204.0, 1209.0, 1215.0, 1220.0, 1225.0, 1230.0, 1235.0, 1241.0, 1246.0, 1251.0, 1257.0, 1262.0, 1267.0, 1273.0, 1278.0, 1284.0, 1289.0, 1294.0, 1300.0, 1305.0, 1311.0, 1316.0, 1322.0, 1327.0, 1333.0, 1338.0, 1344.0, 1350.0, 1355.0, 1361.0, 1367.0, 1372.0, 1378.0, 1383.0, 1389.0, 1395.0, 1401.0, 1407.0, 1413.0, 1418.0, 1424.0, 1430.0, 1436.0, 1442.0, 1448.0, 1454.0, 1459.0, 1465.0, 1472.0, 1477.0, 1483.0, 1489.0, 1495.0, 1501.0, 1507.0, 1514.0, 1520.0, 1526.0, 1532.0, 1538.0, 1545.0, 1551.0, 1557.0, 1564.0, 1570.0, 1576.0, 1583.0, 1589.0, 1596.0, 1602.0, 1608.0, 1615.0, 1621.0, 1628.0, 1634.0, 1641.0, 1647.0, 1654.0, 1661.0, 1667.0, 1674.0, 1681.0, 1687.0, 1694.0, 1701.0, 1708.0, 1715.0, 1722.0, 1729.0, 1735.0, 1742.0, 1749.0, 1756.0, 1763.0, 1770.0, 1777.0, 1784.0, 1791.0, 1798.0, 1806.0, 1812.0, 1820.0, 1827.0, 1835.0, 1841.0, 1849.0, 1856.0, 1863.0, 1871.0, 1878.0, 1885.0, 1893.0, 1901.0, 1908.0, 1915.0, 1923.0, 1930.0, 1938.0, 1946.0, 1953.0, 1961.0, 1969.0, 1976.0, 1984.0, 1992.0, 2000.0, 2007.0, 2015.0, 2023.0, 2031.0, 2039.0, 2047.0, 2055.0, 2063.0, 2071.0, 2079.0, 2087.0, 2095.0, 2104.0, 2112.0, 2120.0, 2128.0, 2137.0, 2146.0, 2154.0, 2162.0, 2171.0, 2179.0, 2188.0, 2197.0, 2205.0, 2214.0, 2223.0, 2232.0, 2241.0, 2250.0, 2258.0, 2268.0, 2277.0, 2285.0, 2294.0, 2304.0, 2313.0, 2322.0, 2331.0, 2340.0, 2350.0, 2359.0, 2368.0, 2378.0, 2388.0, 2397.0, 2407.0, 2416.0, 2426.0, 2436.0, 2446.0, 2455.0, 2465.0, 2475.0, 2485.0, 2495.0, 2505.0, 2515.0, 2525.0, 2535.0, 2545.0, 2556.0, 2566.0, 2577.0, 2587.0, 2598.0, 2609.0, 2620.0, 2631.0, 2641.0, 2652.0, 2663.0, 2674.0, 2685.0, 2696.0, 2708.0, 2719.0, 2730.0, 2742.0, 2753.0, 2764.0, 2776.0, 2788.0, 2799.0, 2811.0, 2823.0, 2835.0, 2847.0, 2858.0, 2870.0, 2882.0, 2894.0, 2906.0, 2919.0, 2931.0, 2943.0, 2956.0, 2968.0, 2981.0, 2994.0, 3006.0, 3019.0, 3032.0, 3045.0, 3058.0, 3070.0, 3083.0, 3096.0, 3109.0, 3121.0, 3134.0, 3148.0, 3161.0, 3174.0, 3187.0, 3200.0, 3214.0, 3228.0, 3242.0, 3255.0, 3268.0, 3283.0, 3297.0, 3310.0, 3325.0, 3340.0, 3353.0, 3368.0, 3383.0, 3398.0, 3412.0, 3427.0, 3442.0, 3457.0, 3471.0, 3487.0, 3502.0, 3516.0, 3531.0, 3546.0, 3561.0, 3577.0, 3593.0, 3608.0, 3625.0, 3641.0, 3657.0, 3673.0, 3690.0, 3706.0, 3722.0, 3738.0, 3755.0, 3772.0, 3789.0, 3805.0, 3823.0, 3839.0, 3856.0, 3873.0, 3891.0, 3908.0, 3926.0, 3944.0, 3960.0, 3977.0, 3995.0, 4013.0, 4031.0, 4048.0, 4067.0, 4085.0, 4104.0, 4122.0, 4140.0, 4159.0, 4177.0, 4196.0, 4215.0, 4234.0, 4253.0, 4272.0, 4291.0, 4311.0, 4332.0, 4351.0, 4371.0, 4391.0, 4412.0, 4433.0, 4454.0, 4474.0, 4496.0, 4518.0, 4538.0, 4558.0, 4579.0, 4601.0, 4619.0, 4640.0, 4662.0, 4684.0, 4706.0, 4728.0, 4751.0, 4771.0, 4794.0, 4818.0, 4840.0, 4863.0, 4887.0, 4910.0, 4933.0, 4956.0, 4980.0, 5004.0, 5028.0, 5052.0, 5076.0, 5100.0, 5125.0, 5152.0, 5175.0, 5200.0, 5226.0, 5251.0, 5278.0, 5304.0, 5329.0, 5354.0, 5381.0, 5407.0, 5433.0, 5460.0, 5488.0, 5516.0, 5544.0, 5573.0, 5600.0, 5628.0, 5656.0, 5684.0, 5713.0, 5741.0, 5771.0, 5799.0, 5830.0, 5860.0, 5891.0, 5921.0, 5951.0, 5980.0, 6010.0, 6041.0, 6073.0, 6105.0, 6133.0, 6163.0, 6195.0, 6227.0, 6258.0, 6291.0, 6322.0, 6356.0, 6390.0, 6424.0, 6457.0, 6491.0, 6527.0, 6561.0, 6596.0, 6631.0, 6665.0, 6701.0, 6736.0, 6771.0, 6805.0, 6840.0, 6877.0, 6911.0, 6947.0, 6985.0, 7022.0, 7059.0, 7097.0, 7135.0, 7174.0, 7212.0, 7251.0, 7289.0, 7327.0, 7366.0, 7406.0, 7447.0, 7486.0, 7525.0, 7566.0, 7606.0, 7646.0, 7688.0, 7728.0, 7771.0, 7814.0, 7859.0, 7901.0, 7949.0, 7992.0, 8036.0, 8082.0, 8127.0, 8173.0, 8218.0, 8262.0, 8309.0, 8353.0, 8397.0, 8444.0, 8489.0, 8539.0, 8585.0, 8632.0, 8682.0, 8727.0, 8777.0, 8828.0, 8879.0, 8929.0, 8982.0, 9037.0, 9087.0, 9140.0, 9193.0, 9250.0, 9305.0, 9361.0, 9418.0, 9475.0, 9532.0, 9589.0, 9644.0, 9699.0, 9758.0, 9818.0, 9875.0, 9935.0, 9997.0, 10057.0, 10117.0, 10174.0, 10232.0, 10296.0, 10356.0, 10419.0, 10482.0, 10546.0, 10608.0, 10670.0, 10729.0, 10790.0, 10855.0, 10920.0, 10990.0, 11054.0, 11118.0, 11181.0, 11248.0, 11316.0, 11385.0, 11454.0, 11526.0, 11597.0, 11667.0, 11740.0, 11820.0, 11897.0, 11973.0, 12046.0, 12126.0, 12204.0, 12287.0, 12370.0, 12456.0, 12538.0, 12627.0, 12714.0, 12799.0, 12883.0, 12971.0, 13062.0, 13154.0, 13233.0, 13328.0, 13418.0, 13511.0, 13607.0, 13709.0, 13806.0, 13903.0, 14002.0, 14104.0, 14200.0, 14288.0, 14391.0, 14488.0, 14590.0, 14698.0, 14808.0, 14910.0, 15020.0, 15126.0, 15238.0, 15347.0, 15456.0, 15574.0, 15692.0, 15786.0, 15896.0, 16016.0, 16136.0, 16250.0, 16352.0, 16474.0, 16575.0, 16702.0, 16835.0, 16965.0, 17096.0, 17232.0, 17370.0, 17443.0, 17581.0, 17719.0, 17864.0, 17976.0, 18116.0, 18250.0, 18396.0, 18540.0, 18690.0, 18840.0, 18989.0, 19136.0, 19294.0, 19445.0, 19589.0, 19750.0, 19905.0, 20064.0, 20191.0, 20325.0, 20497.0, 20662.0, 20833.0, 20981.0, 21152.0, 21334.0, 21510.0, 21642.0, 21821.0, 22001.0, 22186.0, 22379.0, 22568.0, 22770.0, 22958.0, 23162.0, 23360.0, 23524.0, 23737.0, 23960.0, 24175.0, 24395.0, 24631.0, 24865.0, 25091.0, 25327.0, 25580.0, 25833.0, 26089.0, 26361.0, 26636.0, 26889.0, 27155.0, 27436.0, 27715.0, 28003.0, 28303.0, 28600.0, 28916.0, 29223.0, 29553.0, 29884.0, 30200.0, 30538.0, 30868.0, 31211.0, 31548.0, 31881.0, 32253.0, 32605.0, 32980.0, 33385.0, 33805.0, 34254.0, 34723.0, 35167.0, 35666.0, 36125.0, 36652.0, 37177.0, 37739.0, 38321.0, 38932.0, 39640.0, 40337.0, 41000.0, 41626.0, 42385.0, 43122.0, 43890.0, 44687.0, 45609.0, 46520.0, 47489.0, 48432.0, 49458.0, 50511.0, 51561.0, 52568.0, 53676.0, 54936.0, 56071.0, 57302.0, 58513.0, 59800.0, 61192.0, 62702.0, 64205.0, 65868.0, 67780.0, 69960.0, 72330.0, 74918.0, 77540.0, 80344.0, 83727.0, 87662.0, 93589.0, 101441.0, 110544.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 106 + local_name: "variables" + } + children { + node_id: 107 + local_name: "regularization_losses" + } + children { + node_id: 108 + local_name: "trainable_variables" + } + children { + node_id: 109 + local_name: "keras_api" + } + children { + node_id: 242 + local_name: "__call__" + } + children { + node_id: 243 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_9\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_9\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAgAAAAQAAAATAAAAcxgAAACIAHwAgwF9AXQAagF8AXQAagJkAY0CUwApAk4pAdoF\\nZHR5cGUpA9oCdGbaCnplcm9zX2xpa2XaB2Zsb2F0MzIpAtoDb2Jz2gxleHBhbmRlZF9vYnMpAdoO\\nZXhwYW5kX2RpbXNfb3CpAPr0L2V4cG9ydC9oZGEzL2JvcmdsZXQvbG9jYWxfcmFtX2ZzX2RpcnMv\\nMC55dW5kaV9tdXBwZXRfMF8xMjI3MDgzMy4xMy55dW5kaS4xOTQ3MzE0MTc5NjEuOGY0ZjlmOThj\\nYjdhMzA1NS9idWlsZF90YXJnZXRfdHJhaW5fcGFyX2Q5NzU3NTM3MDE2YTJlYjgvdHJhaW4ucGFy\\nL2dvb2dsZTMvbGVhcm5pbmcvc21hcnRjaG9pY2VzL3Jlc2VhcmNoL2NsaWVudHMvY29tcGlsZXJf\\nb3B0L3BvbGljeV90cmFpbmluZy9mZWF0dXJlX29wcy5wedoPZGlzY2FyZF9mZWF0dXJlJwAAAHME\\nAAAAAAEIAQ==\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 110 + local_name: "variables" + } + children { + node_id: 111 + local_name: "regularization_losses" + } + children { + node_id: 112 + local_name: "trainable_variables" + } + children { + node_id: 113 + local_name: "keras_api" + } + children { + node_id: 244 + local_name: "__call__" + } + children { + node_id: 245 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_10\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_10\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [13.0, 38.0, 56.0, 70.0, 82.0, 94.0, 104.0, 114.0, 123.0, 131.0, 139.0, 148.0, 152.0, 153.0, 158.0, 163.0, 170.0, 174.0, 178.0, 180.0, 183.0, 186.0, 188.0, 190.0, 192.0, 196.0, 198.0, 201.0, 205.0, 208.0, 212.0, 215.0, 219.0, 221.0, 225.0, 227.0, 229.0, 232.0, 233.0, 236.0, 239.0, 242.0, 245.0, 248.0, 250.0, 252.0, 254.0, 256.0, 259.0, 261.0, 264.0, 267.0, 270.0, 272.0, 275.0, 278.0, 280.0, 283.0, 285.0, 287.0, 290.0, 293.0, 295.0, 297.0, 300.0, 303.0, 305.0, 308.0, 311.0, 313.0, 316.0, 319.0, 322.0, 325.0, 329.0, 331.0, 333.0, 336.0, 338.0, 340.0, 343.0, 345.0, 347.0, 347.0, 349.0, 351.0, 353.0, 355.0, 357.0, 359.0, 361.0, 363.0, 365.0, 368.0, 369.0, 371.0, 373.0, 375.0, 377.0, 380.0, 382.0, 385.0, 387.0, 389.0, 391.0, 394.0, 396.0, 398.0, 400.0, 403.0, 405.0, 408.0, 410.0, 412.0, 415.0, 417.0, 420.0, 422.0, 425.0, 427.0, 429.0, 432.0, 434.0, 437.0, 439.0, 442.0, 444.0, 446.0, 449.0, 451.0, 454.0, 456.0, 458.0, 461.0, 463.0, 466.0, 469.0, 472.0, 474.0, 476.0, 479.0, 482.0, 483.0, 486.0, 489.0, 492.0, 495.0, 498.0, 500.0, 503.0, 505.0, 508.0, 510.0, 513.0, 516.0, 519.0, 522.0, 524.0, 528.0, 530.0, 533.0, 536.0, 539.0, 541.0, 544.0, 547.0, 550.0, 553.0, 556.0, 559.0, 561.0, 563.0, 567.0, 570.0, 572.0, 575.0, 577.0, 580.0, 584.0, 586.0, 589.0, 592.0, 595.0, 598.0, 601.0, 605.0, 607.0, 611.0, 613.0, 617.0, 620.0, 623.0, 626.0, 629.0, 632.0, 635.0, 639.0, 642.0, 645.0, 648.0, 651.0, 654.0, 657.0, 660.0, 662.0, 666.0, 669.0, 672.0, 676.0, 679.0, 682.0, 685.0, 688.0, 690.0, 693.0, 696.0, 699.0, 702.0, 705.0, 709.0, 712.0, 714.0, 718.0, 721.0, 724.0, 726.0, 728.0, 729.0, 731.0, 734.0, 737.0, 741.0, 745.0, 748.0, 750.0, 753.0, 756.0, 760.0, 763.0, 766.0, 770.0, 773.0, 776.0, 779.0, 782.0, 786.0, 788.0, 793.0, 796.0, 798.0, 802.0, 805.0, 808.0, 811.0, 815.0, 818.0, 820.0, 824.0, 827.0, 829.0, 832.0, 835.0, 838.0, 842.0, 846.0, 849.0, 854.0, 857.0, 860.0, 864.0, 867.0, 871.0, 875.0, 879.0, 882.0, 887.0, 890.0, 893.0, 897.0, 901.0, 905.0, 908.0, 911.0, 915.0, 918.0, 921.0, 925.0, 929.0, 932.0, 934.0, 937.0, 940.0, 943.0, 946.0, 950.0, 953.0, 956.0, 961.0, 965.0, 969.0, 973.0, 976.0, 980.0, 982.0, 985.0, 990.0, 994.0, 997.0, 1001.0, 1005.0, 1007.0, 1010.0, 1014.0, 1018.0, 1022.0, 1025.0, 1028.0, 1033.0, 1035.0, 1038.0, 1042.0, 1047.0, 1052.0, 1056.0, 1060.0, 1063.0, 1067.0, 1071.0, 1075.0, 1079.0, 1083.0, 1086.0, 1088.0, 1092.0, 1097.0, 1102.0, 1106.0, 1109.0, 1113.0, 1117.0, 1120.0, 1125.0, 1129.0, 1134.0, 1137.0, 1142.0, 1146.0, 1150.0, 1151.0, 1155.0, 1159.0, 1162.0, 1166.0, 1170.0, 1174.0, 1177.0, 1181.0, 1185.0, 1188.0, 1193.0, 1196.0, 1203.0, 1207.0, 1212.0, 1214.0, 1217.0, 1220.0, 1222.0, 1222.0, 1226.0, 1229.0, 1233.0, 1237.0, 1241.0, 1246.0, 1250.0, 1253.0, 1257.0, 1262.0, 1267.0, 1272.0, 1278.0, 1283.0, 1287.0, 1293.0, 1297.0, 1301.0, 1304.0, 1309.0, 1315.0, 1320.0, 1325.0, 1329.0, 1333.0, 1336.0, 1341.0, 1344.0, 1348.0, 1351.0, 1357.0, 1363.0, 1368.0, 1374.0, 1379.0, 1383.0, 1386.0, 1391.0, 1395.0, 1399.0, 1403.0, 1407.0, 1410.0, 1415.0, 1418.0, 1423.0, 1428.0, 1432.0, 1436.0, 1438.0, 1442.0, 1446.0, 1450.0, 1454.0, 1462.0, 1467.0, 1472.0, 1477.0, 1483.0, 1488.0, 1492.0, 1496.0, 1503.0, 1508.0, 1513.0, 1518.0, 1520.0, 1526.0, 1531.0, 1534.0, 1538.0, 1542.0, 1546.0, 1552.0, 1558.0, 1564.0, 1568.0, 1573.0, 1578.0, 1581.0, 1590.0, 1596.0, 1601.0, 1606.0, 1611.0, 1616.0, 1622.0, 1629.0, 1634.0, 1640.0, 1647.0, 1651.0, 1657.0, 1660.0, 1665.0, 1672.0, 1678.0, 1686.0, 1692.0, 1698.0, 1704.0, 1709.0, 1714.0, 1719.0, 1724.0, 1730.0, 1737.0, 1744.0, 1751.0, 1755.0, 1761.0, 1764.0, 1772.0, 1778.0, 1784.0, 1789.0, 1799.0, 1804.0, 1811.0, 1819.0, 1825.0, 1830.0, 1838.0, 1849.0, 1858.0, 1862.0, 1868.0, 1872.0, 1878.0, 1885.0, 1888.0, 1892.0, 1897.0, 1902.0, 1907.0, 1919.0, 1926.0, 1932.0, 1936.0, 1941.0, 1946.0, 1952.0, 1960.0, 1968.0, 1977.0, 1985.0, 1992.0, 1997.0, 2006.0, 2012.0, 2018.0, 2026.0, 2034.0, 2044.0, 2050.0, 2057.0, 2064.0, 2069.0, 2075.0, 2082.0, 2091.0, 2098.0, 2107.0, 2122.0, 2126.0, 2135.0, 2146.0, 2149.0, 2157.0, 2163.0, 2172.0, 2178.0, 2184.0, 2191.0, 2198.0, 2208.0, 2216.0, 2223.0, 2235.0, 2242.0, 2252.0, 2263.0, 2272.0, 2277.0, 2288.0, 2296.0, 2306.0, 2311.0, 2318.0, 2323.0, 2334.0, 2341.0, 2356.0, 2366.0, 2373.0, 2379.0, 2386.0, 2407.0, 2416.0, 2423.0, 2432.0, 2438.0, 2448.0, 2453.0, 2464.0, 2473.0, 2473.0, 2481.0, 2492.0, 2504.0, 2511.0, 2523.0, 2529.0, 2537.0, 2545.0, 2556.0, 2566.0, 2575.0, 2584.0, 2592.0, 2602.0, 2613.0, 2624.0, 2636.0, 2643.0, 2647.0, 2652.0, 2664.0, 2675.0, 2688.0, 2693.0, 2702.0, 2709.0, 2722.0, 2739.0, 2754.0, 2766.0, 2776.0, 2786.0, 2799.0, 2810.0, 2832.0, 2840.0, 2849.0, 2860.0, 2873.0, 2889.0, 2908.0, 2914.0, 2926.0, 2939.0, 2950.0, 2961.0, 2969.0, 2978.0, 2990.0, 2999.0, 3023.0, 3032.0, 3049.0, 3066.0, 3085.0, 3101.0, 3107.0, 3117.0, 3129.0, 3144.0, 3167.0, 3190.0, 3212.0, 3229.0, 3238.0, 3264.0, 3293.0, 3302.0, 3309.0, 3314.0, 3323.0, 3344.0, 3352.0, 3362.0, 3390.0, 3400.0, 3411.0, 3435.0, 3456.0, 3470.0, 3485.0, 3498.0, 3505.0, 3519.0, 3539.0, 3545.0, 3545.0, 3560.0, 3576.0, 3597.0, 3607.0, 3621.0, 3641.0, 3665.0, 3679.0, 3701.0, 3714.0, 3733.0, 3741.0, 3745.0, 3757.0, 3773.0, 3787.0, 3795.0, 3805.0, 3822.0, 3835.0, 3844.0, 3861.0, 3872.0, 3878.0, 3897.0, 3919.0, 3941.0, 3971.0, 4004.0, 4014.0, 4019.0, 4061.0, 4068.0, 4089.0, 4108.0, 4117.0, 4125.0, 4146.0, 4165.0, 4194.0, 4204.0, 4224.0, 4236.0, 4263.0, 4290.0, 4301.0, 4319.0, 4326.0, 4347.0, 4369.0, 4386.0, 4413.0, 4435.0, 4451.0, 4451.0, 4451.0, 4476.0, 4500.0, 4539.0, 4579.0, 4592.0, 4600.0, 4622.0, 4650.0, 4683.0, 4714.0, 4742.0, 4755.0, 4771.0, 4788.0, 4816.0, 4828.0, 4831.0, 4831.0, 4831.0, 4843.0, 4852.0, 4865.0, 4896.0, 4915.0, 4931.0, 4952.0, 4965.0, 4983.0, 5007.0, 5043.0, 5061.0, 5081.0, 5095.0, 5122.0, 5143.0, 5171.0, 5204.0, 5226.0, 5233.0, 5250.0, 5281.0, 5320.0, 5323.0, 5328.0, 5345.0, 5374.0, 5413.0, 5466.0, 5492.0, 5524.0, 5555.0, 5567.0, 5610.0, 5676.0, 5701.0, 5716.0, 5744.0, 5768.0, 5795.0, 5818.0, 5854.0, 5906.0, 5934.0, 5960.0, 5975.0, 5993.0, 6025.0, 6034.0, 6051.0, 6082.0, 6106.0, 6125.0, 6159.0, 6187.0, 6242.0, 6287.0, 6311.0, 6332.0, 6348.0, 6358.0, 6368.0, 6377.0, 6402.0, 6407.0, 6428.0, 6450.0, 6475.0, 6498.0, 6505.0, 6533.0, 6565.0, 6580.0, 6595.0, 6611.0, 6654.0, 6658.0, 6705.0, 6751.0, 6786.0, 6828.0, 6876.0, 6896.0, 6948.0, 6964.0, 7065.0, 7082.0, 7118.0, 7184.0, 7214.0, 7271.0, 7310.0, 7357.0, 7405.0, 7506.0, 7613.0, 7641.0, 7675.0, 7720.0, 7781.0, 7833.0, 7860.0, 7898.0, 7929.0, 8044.0, 8104.0, 8148.0, 8236.0, 8273.0, 8313.0, 8349.0, 8381.0, 8409.0, 8498.0, 8507.0, 8524.0, 8570.0, 8607.0, 8630.0, 8637.0, 8675.0, 8700.0, 8714.0, 8734.0, 8776.0, 8836.0, 8854.0, 8867.0, 8868.0, 9065.0, 9113.0, 9121.0, 9241.0, 9357.0, 9360.0, 9585.0, 9613.0, 9684.0, 9727.0, 9751.0, 9777.0, 9802.0, 9889.0, 9903.0, 9914.0, 9978.0, 10061.0, 10192.0, 10213.0, 10345.0, 10369.0, 10404.0, 10430.0, 10471.0, 10481.0, 10489.0, 10492.0, 10494.0, 10524.0, 10554.0, 10557.0, 10560.0, 10562.0, 10641.0, 10716.0, 10842.0, 10897.0, 10967.0, 11053.0, 11128.0, 11137.0, 11328.0, 11336.0, 11401.0, 11532.0, 11573.0, 11860.0, 11880.0, 12013.0, 12305.0, 12358.0, 12386.0, 12404.0, 12456.0, 12456.0, 12476.0, 12615.0, 12677.0, 12981.0, 13094.0, 13197.0, 13708.0, 13717.0, 13788.0, 14049.0, 14112.0, 14224.0, 14257.0, 14681.0, 14901.0, 15006.0, 15071.0, 15100.0, 15248.0, 15669.0, 15877.0, 15953.0, 15953.0, 16066.0, 16072.0, 16271.0, 16292.0, 16386.0, 16490.0, 16633.0, 16670.0, 16834.0, 16896.0, 17543.0, 17693.0, 17800.0, 17859.0, 18397.0, 18811.0, 18826.0, 18971.0, 19304.0, 19319.0, 19695.0, 20378.0, 20865.0, 21313.0, 21330.0, 22321.0, 22760.0, 22770.0, 23783.0, 23785.0, 24525.0, 24844.0, 24848.0, 24964.0, 24966.0, 27468.0, 27478.0, 27555.0, 27555.0, 28215.0, 28219.0, 28336.0, 28490.0, 30213.0, 30228.0, 30242.0, 34116.0, 43518.0, 43518.0, 43518.0, 43852.0, 43852.0, 43852.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 114 + local_name: "variables" + } + children { + node_id: 115 + local_name: "regularization_losses" + } + children { + node_id: 116 + local_name: "trainable_variables" + } + children { + node_id: 117 + local_name: "keras_api" + } + children { + node_id: 246 + local_name: "__call__" + } + children { + node_id: 247 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_11\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_11\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 4.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 118 + local_name: "layer_metrics" + } + children { + node_id: 53 + local_name: "variables" + } + children { + node_id: 119 + local_name: "layer_regularization_losses" + } + children { + node_id: 120 + local_name: "metrics" + } + children { + node_id: 121 + local_name: "layers" + } + children { + node_id: 54 + local_name: "regularization_losses" + } + children { + node_id: 122 + local_name: "non_trainable_variables" + } + children { + node_id: 55 + local_name: "trainable_variables" + } + children { + node_id: 222 + local_name: "__call__" + } + children { + node_id: 223 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 223 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 123 + local_name: "variables" + } + children { + node_id: 124 + local_name: "regularization_losses" + } + children { + node_id: 125 + local_name: "trainable_variables" + } + children { + node_id: 126 + local_name: "keras_api" + } + children { + node_id: 248 + local_name: "__call__" + } + children { + node_id: 249 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Flatten\", \"name\": \"flatten\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"flatten\", \"trainable\": true, \"dtype\": \"float32\", \"data_format\": \"channels_last\"}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 1, \"axes\": {}}}}" + } + } + nodes { + children { + node_id: 10 + local_name: "kernel" + } + children { + node_id: 11 + local_name: "bias" + } + children { + node_id: 127 + local_name: "variables" + } + children { + node_id: 128 + local_name: "regularization_losses" + } + children { + node_id: 129 + local_name: "trainable_variables" + } + children { + node_id: 130 + local_name: "keras_api" + } + children { + node_id: 250 + local_name: "__call__" + } + children { + node_id: 251 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Dense\", \"name\": \"dense\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"dense\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 100, \"activation\": \"relu\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"VarianceScaling\", \"config\": {\"scale\": 2.0, \"mode\": \"fan_in\", \"distribution\": \"truncated_normal\", \"seed\": null, \"dtype\": \"float32\"}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 2, \"axes\": {\"-1\": 34}}}, \"build_input_shape\": {\"class_name\": \"TensorShape\", \"items\": [0, 34]}}" + } + } + nodes { + children { + node_id: 12 + local_name: "kernel" + } + children { + node_id: 13 + local_name: "bias" + } + children { + node_id: 131 + local_name: "variables" + } + children { + node_id: 132 + local_name: "regularization_losses" + } + children { + node_id: 133 + local_name: "trainable_variables" + } + children { + node_id: 134 + local_name: "keras_api" + } + children { + node_id: 252 + local_name: "__call__" + } + children { + node_id: 253 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Dense\", \"name\": \"dense_1\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"dense_1\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 40, \"activation\": \"relu\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"VarianceScaling\", \"config\": {\"scale\": 2.0, \"mode\": \"fan_in\", \"distribution\": \"truncated_normal\", \"seed\": null, \"dtype\": \"float32\"}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 2, \"axes\": {\"-1\": 100}}}, \"build_input_shape\": {\"class_name\": \"TensorShape\", \"items\": [0, 100]}}" + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 41 + local_name: "0" + } + children { + node_id: 42 + local_name: "1" + } + children { + node_id: 43 + local_name: "2" + } + children { + node_id: 44 + local_name: "3" + } + children { + node_id: 45 + local_name: "4" + } + children { + node_id: 46 + local_name: "5" + } + children { + node_id: 47 + local_name: "6" + } + children { + node_id: 48 + local_name: "7" + } + children { + node_id: 49 + local_name: "8" + } + children { + node_id: 50 + local_name: "9" + } + children { + node_id: 51 + local_name: "10" + } + children { + node_id: 52 + local_name: "11" + } + children { + node_id: 26 + local_name: "12" + } + children { + node_id: 57 + local_name: "13" + } + children { + node_id: 58 + local_name: "14" + } + children { + node_id: 59 + local_name: "15" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 135 + local_name: "layer_metrics" + } + children { + node_id: 70 + local_name: "variables" + } + children { + node_id: 136 + local_name: "layer_regularization_losses" + } + children { + node_id: 137 + local_name: "metrics" + } + children { + node_id: 138 + local_name: "layers" + } + children { + node_id: 71 + local_name: "regularization_losses" + } + children { + node_id: 139 + local_name: "non_trainable_variables" + } + children { + node_id: 72 + local_name: "trainable_variables" + } + children { + node_id: 224 + local_name: "__call__" + } + children { + node_id: 225 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 225 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 140 + local_name: "layer_metrics" + } + children { + node_id: 74 + local_name: "variables" + } + children { + node_id: 141 + local_name: "layer_regularization_losses" + } + children { + node_id: 142 + local_name: "metrics" + } + children { + node_id: 143 + local_name: "layers" + } + children { + node_id: 75 + local_name: "regularization_losses" + } + children { + node_id: 144 + local_name: "non_trainable_variables" + } + children { + node_id: 76 + local_name: "trainable_variables" + } + children { + node_id: 226 + local_name: "__call__" + } + children { + node_id: 227 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 227 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 145 + local_name: "layer_metrics" + } + children { + node_id: 78 + local_name: "variables" + } + children { + node_id: 146 + local_name: "layer_regularization_losses" + } + children { + node_id: 147 + local_name: "metrics" + } + children { + node_id: 148 + local_name: "layers" + } + children { + node_id: 79 + local_name: "regularization_losses" + } + children { + node_id: 149 + local_name: "non_trainable_variables" + } + children { + node_id: 80 + local_name: "trainable_variables" + } + children { + node_id: 228 + local_name: "__call__" + } + children { + node_id: 229 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 229 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 150 + local_name: "layer_metrics" + } + children { + node_id: 82 + local_name: "variables" + } + children { + node_id: 151 + local_name: "layer_regularization_losses" + } + children { + node_id: 152 + local_name: "metrics" + } + children { + node_id: 153 + local_name: "layers" + } + children { + node_id: 83 + local_name: "regularization_losses" + } + children { + node_id: 154 + local_name: "non_trainable_variables" + } + children { + node_id: 84 + local_name: "trainable_variables" + } + children { + node_id: 230 + local_name: "__call__" + } + children { + node_id: 231 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 231 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 155 + local_name: "layer_metrics" + } + children { + node_id: 86 + local_name: "variables" + } + children { + node_id: 156 + local_name: "layer_regularization_losses" + } + children { + node_id: 157 + local_name: "metrics" + } + children { + node_id: 158 + local_name: "layers" + } + children { + node_id: 87 + local_name: "regularization_losses" + } + children { + node_id: 159 + local_name: "non_trainable_variables" + } + children { + node_id: 88 + local_name: "trainable_variables" + } + children { + node_id: 232 + local_name: "__call__" + } + children { + node_id: 233 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 233 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 160 + local_name: "layer_metrics" + } + children { + node_id: 90 + local_name: "variables" + } + children { + node_id: 161 + local_name: "layer_regularization_losses" + } + children { + node_id: 162 + local_name: "metrics" + } + children { + node_id: 163 + local_name: "layers" + } + children { + node_id: 91 + local_name: "regularization_losses" + } + children { + node_id: 164 + local_name: "non_trainable_variables" + } + children { + node_id: 92 + local_name: "trainable_variables" + } + children { + node_id: 234 + local_name: "__call__" + } + children { + node_id: 235 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 235 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 165 + local_name: "layer_metrics" + } + children { + node_id: 94 + local_name: "variables" + } + children { + node_id: 166 + local_name: "layer_regularization_losses" + } + children { + node_id: 167 + local_name: "metrics" + } + children { + node_id: 168 + local_name: "layers" + } + children { + node_id: 95 + local_name: "regularization_losses" + } + children { + node_id: 169 + local_name: "non_trainable_variables" + } + children { + node_id: 96 + local_name: "trainable_variables" + } + children { + node_id: 236 + local_name: "__call__" + } + children { + node_id: 237 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 237 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 170 + local_name: "layer_metrics" + } + children { + node_id: 98 + local_name: "variables" + } + children { + node_id: 171 + local_name: "layer_regularization_losses" + } + children { + node_id: 172 + local_name: "metrics" + } + children { + node_id: 173 + local_name: "layers" + } + children { + node_id: 99 + local_name: "regularization_losses" + } + children { + node_id: 174 + local_name: "non_trainable_variables" + } + children { + node_id: 100 + local_name: "trainable_variables" + } + children { + node_id: 238 + local_name: "__call__" + } + children { + node_id: 239 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 239 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 175 + local_name: "layer_metrics" + } + children { + node_id: 102 + local_name: "variables" + } + children { + node_id: 176 + local_name: "layer_regularization_losses" + } + children { + node_id: 177 + local_name: "metrics" + } + children { + node_id: 178 + local_name: "layers" + } + children { + node_id: 103 + local_name: "regularization_losses" + } + children { + node_id: 179 + local_name: "non_trainable_variables" + } + children { + node_id: 104 + local_name: "trainable_variables" + } + children { + node_id: 240 + local_name: "__call__" + } + children { + node_id: 241 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 241 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 180 + local_name: "layer_metrics" + } + children { + node_id: 106 + local_name: "variables" + } + children { + node_id: 181 + local_name: "layer_regularization_losses" + } + children { + node_id: 182 + local_name: "metrics" + } + children { + node_id: 183 + local_name: "layers" + } + children { + node_id: 107 + local_name: "regularization_losses" + } + children { + node_id: 184 + local_name: "non_trainable_variables" + } + children { + node_id: 108 + local_name: "trainable_variables" + } + children { + node_id: 242 + local_name: "__call__" + } + children { + node_id: 243 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 243 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 185 + local_name: "layer_metrics" + } + children { + node_id: 110 + local_name: "variables" + } + children { + node_id: 186 + local_name: "layer_regularization_losses" + } + children { + node_id: 187 + local_name: "metrics" + } + children { + node_id: 188 + local_name: "layers" + } + children { + node_id: 111 + local_name: "regularization_losses" + } + children { + node_id: 189 + local_name: "non_trainable_variables" + } + children { + node_id: 112 + local_name: "trainable_variables" + } + children { + node_id: 244 + local_name: "__call__" + } + children { + node_id: 245 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 245 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 190 + local_name: "layer_metrics" + } + children { + node_id: 114 + local_name: "variables" + } + children { + node_id: 191 + local_name: "layer_regularization_losses" + } + children { + node_id: 192 + local_name: "metrics" + } + children { + node_id: 193 + local_name: "layers" + } + children { + node_id: 115 + local_name: "regularization_losses" + } + children { + node_id: 194 + local_name: "non_trainable_variables" + } + children { + node_id: 116 + local_name: "trainable_variables" + } + children { + node_id: 246 + local_name: "__call__" + } + children { + node_id: 247 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 247 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 195 + local_name: "layer_metrics" + } + children { + node_id: 123 + local_name: "variables" + } + children { + node_id: 196 + local_name: "layer_regularization_losses" + } + children { + node_id: 197 + local_name: "metrics" + } + children { + node_id: 198 + local_name: "layers" + } + children { + node_id: 124 + local_name: "regularization_losses" + } + children { + node_id: 199 + local_name: "non_trainable_variables" + } + children { + node_id: 125 + local_name: "trainable_variables" + } + children { + node_id: 248 + local_name: "__call__" + } + children { + node_id: 249 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 249 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 10 + local_name: "0" + } + children { + node_id: 11 + local_name: "1" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 10 + local_name: "0" + } + children { + node_id: 11 + local_name: "1" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 200 + local_name: "layer_metrics" + } + children { + node_id: 127 + local_name: "variables" + } + children { + node_id: 201 + local_name: "layer_regularization_losses" + } + children { + node_id: 202 + local_name: "metrics" + } + children { + node_id: 203 + local_name: "layers" + } + children { + node_id: 128 + local_name: "regularization_losses" + } + children { + node_id: 204 + local_name: "non_trainable_variables" + } + children { + node_id: 129 + local_name: "trainable_variables" + } + children { + node_id: 250 + local_name: "__call__" + } + children { + node_id: 251 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 251 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 12 + local_name: "0" + } + children { + node_id: 13 + local_name: "1" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 12 + local_name: "0" + } + children { + node_id: 13 + local_name: "1" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 205 + local_name: "layer_metrics" + } + children { + node_id: 131 + local_name: "variables" + } + children { + node_id: 206 + local_name: "layer_regularization_losses" + } + children { + node_id: 207 + local_name: "metrics" + } + children { + node_id: 208 + local_name: "layers" + } + children { + node_id: 132 + local_name: "regularization_losses" + } + children { + node_id: 209 + local_name: "non_trainable_variables" + } + children { + node_id: 133 + local_name: "trainable_variables" + } + children { + node_id: 252 + local_name: "__call__" + } + children { + node_id: 253 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 253 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + function { + concrete_functions: "__inference_polymorphic_action_fn_4619080" + concrete_functions: "__inference_polymorphic_action_fn_946" + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "time_step" + } + values { + string_value: "policy_state" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + tuple_value { + values { + tuple_value { + } + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + concrete_functions: "__inference_function_722" + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + input_signature { + none_value { + } + } + } + } + } + nodes { + bare_concrete_function { + concrete_function_name: "__inference_<lambda>_728" + } + } + nodes { + bare_concrete_function { + concrete_function_name: "__inference_signature_wrapper_4619026" + argument_keywords: "callee_basic_block_count" + argument_keywords: "callee_conditionally_executed_blocks" + argument_keywords: "callee_users" + argument_keywords: "caller_basic_block_count" + argument_keywords: "caller_conditionally_executed_blocks" + argument_keywords: "caller_users" + argument_keywords: "callsite_height" + argument_keywords: "cost_estimate" + argument_keywords: "discount" + argument_keywords: "edge_count" + argument_keywords: "inlining_default" + argument_keywords: "node_count" + argument_keywords: "nr_ctant_params" + argument_keywords: "reward" + argument_keywords: "step_type" + } + } + nodes { + bare_concrete_function { + concrete_function_name: "__inference_signature_wrapper_4619033" + } + } + nodes { + bare_concrete_function { + concrete_function_name: "__inference_signature_wrapper_4619048" + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "observation" + } + values { + string_value: "step_type" + } + values { + string_value: "network_state" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + tuple_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "observation" + } + values { + string_value: "step_type" + } + values { + string_value: "network_state" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + tuple_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "observation" + } + values { + string_value: "step_type" + } + values { + string_value: "network_state" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + tuple_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "observation" + } + values { + string_value: "step_type" + } + values { + string_value: "network_state" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + tuple_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + concrete_functions { + key: "__inference_<lambda>_728" + value { + bound_inputs: 4 + canonicalized_input_signature { + tuple_value { + values { + tuple_value { + } + } + values { + dict_value { + } + } + } + } + output_signature { + tensor_spec_value { + shape { + } + dtype: DT_INT64 + } + } + } + } + concrete_functions { + key: "__inference_function_722" + value { + canonicalized_input_signature { + tuple_value { + values { + tuple_value { + } + } + values { + dict_value { + } + } + } + } + output_signature { + tuple_value { + } + } + } + } + concrete_functions { + key: "__inference_polymorphic_action_fn_4619080" + value { + bound_inputs: 10 + bound_inputs: 11 + bound_inputs: 12 + bound_inputs: 13 + bound_inputs: 14 + bound_inputs: 15 + canonicalized_input_signature { + tuple_value { + values { + tuple_value { + values { + named_tuple_value { + name: "TimeStep" + values { + key: "step_type" + value { + tensor_spec_value { + name: "time_step/step_type" + shape { + dim { + size: 1 + } + } + dtype: DT_INT32 + } + } + } + values { + key: "reward" + value { + tensor_spec_value { + name: "time_step/reward" + shape { + dim { + size: 1 + } + } + dtype: DT_FLOAT + } + } + } + values { + key: "discount" + value { + tensor_spec_value { + name: "time_step/discount" + shape { + dim { + size: 1 + } + } + dtype: DT_FLOAT + } + } + } + values { + key: "observation" + value { + dict_value { + fields { + key: "callee_basic_block_count" + value { + tensor_spec_value { + name: "time_step/observation/callee_basic_block_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "callee_conditionally_executed_blocks" + value { + tensor_spec_value { + name: "time_step/observation/callee_conditionally_executed_blocks" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "callee_users" + value { + tensor_spec_value { + name: "time_step/observation/callee_users" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "caller_basic_block_count" + value { + tensor_spec_value { + name: "time_step/observation/caller_basic_block_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "caller_conditionally_executed_blocks" + value { + tensor_spec_value { + name: "time_step/observation/caller_conditionally_executed_blocks" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "caller_users" + value { + tensor_spec_value { + name: "time_step/observation/caller_users" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "callsite_height" + value { + tensor_spec_value { + name: "time_step/observation/callsite_height" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "cost_estimate" + value { + tensor_spec_value { + name: "time_step/observation/cost_estimate" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "edge_count" + value { + tensor_spec_value { + name: "time_step/observation/edge_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "inlining_default" + value { + tensor_spec_value { + name: "time_step/observation/inlining_default" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "node_count" + value { + tensor_spec_value { + name: "time_step/observation/node_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "nr_ctant_params" + value { + tensor_spec_value { + name: "time_step/observation/nr_ctant_params" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + } + } + } + } + } + values { + tuple_value { + } + } + } + } + values { + dict_value { + } + } + } + } + output_signature { + named_tuple_value { + name: "PolicyStep" + values { + key: "action" + value { + tensor_spec_value { + name: "action" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + values { + key: "state" + value { + tuple_value { + } + } + } + values { + key: "info" + value { + tuple_value { + } + } + } + } + } + } + } + concrete_functions { + key: "__inference_polymorphic_action_fn_946" + value { + bound_inputs: 10 + bound_inputs: 11 + bound_inputs: 12 + bound_inputs: 13 + bound_inputs: 14 + bound_inputs: 15 + canonicalized_input_signature { + tuple_value { + values { + tuple_value { + values { + named_tuple_value { + name: "TimeStep" + values { + key: "step_type" + value { + tensor_spec_value { + name: "step_type" + shape { + dim { + size: 1 + } + } + dtype: DT_INT32 + } + } + } + values { + key: "reward" + value { + tensor_spec_value { + name: "reward" + shape { + dim { + size: 1 + } + } + dtype: DT_FLOAT + } + } + } + values { + key: "discount" + value { + tensor_spec_value { + name: "discount" + shape { + dim { + size: 1 + } + } + dtype: DT_FLOAT + } + } + } + values { + key: "observation" + value { + dict_value { + fields { + key: "callee_basic_block_count" + value { + tensor_spec_value { + name: "callee_basic_block_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "callee_conditionally_executed_blocks" + value { + tensor_spec_value { + name: "callee_conditionally_executed_blocks" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "callee_users" + value { + tensor_spec_value { + name: "callee_users" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "caller_basic_block_count" + value { + tensor_spec_value { + name: "caller_basic_block_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "caller_conditionally_executed_blocks" + value { + tensor_spec_value { + name: "caller_conditionally_executed_blocks" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "caller_users" + value { + tensor_spec_value { + name: "caller_users" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "callsite_height" + value { + tensor_spec_value { + name: "callsite_height" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "cost_estimate" + value { + tensor_spec_value { + name: "cost_estimate" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "edge_count" + value { + tensor_spec_value { + name: "edge_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "inlining_default" + value { + tensor_spec_value { + name: "inlining_default" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "node_count" + value { + tensor_spec_value { + name: "node_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "nr_ctant_params" + value { + tensor_spec_value { + name: "nr_ctant_params" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + } + } + } + } + } + values { + tuple_value { + } + } + } + } + values { + dict_value { + } + } + } + } + output_signature { + named_tuple_value { + name: "PolicyStep" + values { + key: "action" + value { + tensor_spec_value { + name: "action" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + values { + key: "state" + value { + tuple_value { + } + } + } + values { + key: "info" + value { + tuple_value { + } + } + } + } + } + } + } + concrete_functions { + key: "__inference_signature_wrapper_4619026" + value { + bound_inputs: 10 + bound_inputs: 11 + bound_inputs: 12 + bound_inputs: 13 + bound_inputs: 14 + bound_inputs: 15 + canonicalized_input_signature { + tuple_value { + values { + tuple_value { + } + } + values { + dict_value { + fields { + key: "callee_basic_block_count" + value { + tensor_spec_value { + name: "callee_basic_block_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "callee_conditionally_executed_blocks" + value { + tensor_spec_value { + name: "callee_conditionally_executed_blocks" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "callee_users" + value { + tensor_spec_value { + name: "callee_users" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "caller_basic_block_count" + value { + tensor_spec_value { + name: "caller_basic_block_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "caller_conditionally_executed_blocks" + value { + tensor_spec_value { + name: "caller_conditionally_executed_blocks" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "caller_users" + value { + tensor_spec_value { + name: "caller_users" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "callsite_height" + value { + tensor_spec_value { + name: "callsite_height" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "cost_estimate" + value { + tensor_spec_value { + name: "cost_estimate" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "discount" + value { + tensor_spec_value { + name: "discount" + shape { + dim { + size: 1 + } + } + dtype: DT_FLOAT + } + } + } + fields { + key: "edge_count" + value { + tensor_spec_value { + name: "edge_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "inlining_default" + value { + tensor_spec_value { + name: "inlining_default" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "node_count" + value { + tensor_spec_value { + name: "node_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "nr_ctant_params" + value { + tensor_spec_value { + name: "nr_ctant_params" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "reward" + value { + tensor_spec_value { + name: "reward" + shape { + dim { + size: 1 + } + } + dtype: DT_FLOAT + } + } + } + fields { + key: "step_type" + value { + tensor_spec_value { + name: "step_type" + shape { + dim { + size: 1 + } + } + dtype: DT_INT32 + } + } + } + } + } + } + } + output_signature { + dict_value { + fields { + key: "inlining_decision" + value { + tensor_spec_value { + name: "inlining_decision" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + } + } + } + } + concrete_functions { + key: "__inference_signature_wrapper_4619033" + value { + canonicalized_input_signature { + tuple_value { + values { + tuple_value { + } + } + values { + dict_value { + } + } + } + } + output_signature { + dict_value { + } + } + } + } + concrete_functions { + key: "__inference_signature_wrapper_4619048" + value { + bound_inputs: 4 + canonicalized_input_signature { + tuple_value { + values { + tuple_value { + } + } + values { + dict_value { + } + } + } + } + output_signature { + dict_value { + fields { + key: "int64" + value { + tensor_spec_value { + name: "int64" + shape { + } + dtype: DT_INT64 + } + } + } + } + } + } + } + } +} + diff --git a/llvm/lib/Analysis/models/inliner/variables/variables.data-00000-of-00001 b/llvm/lib/Analysis/models/inliner/variables/variables.data-00000-of-00001 Binary files differnew file mode 100644 index 0000000000000..ee7d7060867e7 --- /dev/null +++ b/llvm/lib/Analysis/models/inliner/variables/variables.data-00000-of-00001 diff --git a/llvm/lib/Analysis/models/inliner/variables/variables.index b/llvm/lib/Analysis/models/inliner/variables/variables.index Binary files differnew file mode 100644 index 0000000000000..7e0c10c1780e0 --- /dev/null +++ b/llvm/lib/Analysis/models/inliner/variables/variables.index |