summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Utils
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
commitcfca06d7963fa0909f90483b42a6d7d194d01e08 (patch)
tree209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/lib/Transforms/Utils
parent706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff)
Notes
Diffstat (limited to 'llvm/lib/Transforms/Utils')
-rw-r--r--llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp246
-rw-r--r--llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp618
-rw-r--r--llvm/lib/Transforms/Utils/BasicBlockUtils.cpp295
-rw-r--r--llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp90
-rw-r--r--llvm/lib/Transforms/Utils/BuildLibCalls.cpp201
-rw-r--r--llvm/lib/Transforms/Utils/BypassSlowDivision.cpp12
-rw-r--r--llvm/lib/Transforms/Utils/CallGraphUpdater.cpp167
-rw-r--r--llvm/lib/Transforms/Utils/CallPromotionUtils.cpp231
-rw-r--r--llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp250
-rw-r--r--llvm/lib/Transforms/Utils/CloneFunction.cpp29
-rw-r--r--llvm/lib/Transforms/Utils/CodeExtractor.cpp227
-rw-r--r--llvm/lib/Transforms/Utils/CodeMoverUtils.cpp281
-rw-r--r--llvm/lib/Transforms/Utils/Debugify.cpp135
-rw-r--r--llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/EscapeEnumerator.cpp5
-rw-r--r--llvm/lib/Transforms/Utils/Evaluator.cpp49
-rw-r--r--llvm/lib/Transforms/Utils/FixIrreducible.cpp337
-rw-r--r--llvm/lib/Transforms/Utils/FlattenCFG.cpp128
-rw-r--r--llvm/lib/Transforms/Utils/FunctionComparator.cpp107
-rw-r--r--llvm/lib/Transforms/Utils/FunctionImportUtils.cpp25
-rw-r--r--llvm/lib/Transforms/Utils/GlobalStatus.cpp5
-rw-r--r--llvm/lib/Transforms/Utils/InjectTLIMappings.cpp52
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp432
-rw-r--r--llvm/lib/Transforms/Utils/InstructionNamer.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/LCSSA.cpp14
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp307
-rw-r--r--llvm/lib/Transforms/Utils/LoopRotationUtils.cpp649
-rw-r--r--llvm/lib/Transforms/Utils/LoopSimplify.cpp28
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp312
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp533
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp47
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp21
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp773
-rw-r--r--llvm/lib/Transforms/Utils/LoopVersioning.cpp15
-rw-r--r--llvm/lib/Transforms/Utils/LowerInvoke.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp166
-rw-r--r--llvm/lib/Transforms/Utils/LowerSwitch.cpp7
-rw-r--r--llvm/lib/Transforms/Utils/ModuleUtils.cpp25
-rw-r--r--llvm/lib/Transforms/Utils/NameAnonGlobals.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/PredicateInfo.cpp171
-rw-r--r--llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp5
-rw-r--r--llvm/lib/Transforms/Utils/SSAUpdater.cpp7
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp2569
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp361
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyIndVar.cpp34
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp422
-rw-r--r--llvm/lib/Transforms/Utils/SizeOpts.cpp33
-rw-r--r--llvm/lib/Transforms/Utils/StripGCRelocates.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/SymbolRewriter.cpp30
-rw-r--r--llvm/lib/Transforms/Utils/UnifyLoopExits.cpp220
-rw-r--r--llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp97
-rw-r--r--llvm/lib/Transforms/Utils/Utils.cpp6
-rw-r--r--llvm/lib/Transforms/Utils/VNCoercion.cpp205
-rw-r--r--llvm/lib/Transforms/Utils/ValueMapper.cpp16
54 files changed, 8811 insertions, 2193 deletions
diff --git a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
new file mode 100644
index 0000000000000..84a66e1e96d2c
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -0,0 +1,246 @@
+//===- AMDGPUEmitPrintf.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility function to lower a printf call into a series of device
+// library calls on the AMDGPU target.
+//
+// WARNING: This file knows about certain library functions. It recognizes them
+// by name, and hardwires knowledge of their semantics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+
+#include <iostream>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-emit-printf"
+
+static bool isCString(const Value *Arg) {
+ auto Ty = Arg->getType();
+ auto PtrTy = dyn_cast<PointerType>(Ty);
+ if (!PtrTy)
+ return false;
+
+ auto IntTy = dyn_cast<IntegerType>(PtrTy->getElementType());
+ if (!IntTy)
+ return false;
+
+ return IntTy->getBitWidth() == 8;
+}
+
+static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) {
+ auto Int64Ty = Builder.getInt64Ty();
+ auto Ty = Arg->getType();
+
+ if (auto IntTy = dyn_cast<IntegerType>(Ty)) {
+ switch (IntTy->getBitWidth()) {
+ case 32:
+ return Builder.CreateZExt(Arg, Int64Ty);
+ case 64:
+ return Arg;
+ }
+ }
+
+ if (Ty->getTypeID() == Type::DoubleTyID) {
+ return Builder.CreateBitCast(Arg, Int64Ty);
+ }
+
+ if (isa<PointerType>(Ty)) {
+ return Builder.CreatePtrToInt(Arg, Int64Ty);
+ }
+
+ llvm_unreachable("unexpected type");
+}
+
+static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) {
+ auto Int64Ty = Builder.getInt64Ty();
+ auto M = Builder.GetInsertBlock()->getModule();
+ auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
+ return Builder.CreateCall(Fn, Version);
+}
+
+static Value *callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs,
+ Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3,
+ Value *Arg4, Value *Arg5, Value *Arg6,
+ bool IsLast) {
+ auto Int64Ty = Builder.getInt64Ty();
+ auto Int32Ty = Builder.getInt32Ty();
+ auto M = Builder.GetInsertBlock()->getModule();
+ auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty,
+ Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty,
+ Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty);
+ auto IsLastValue = Builder.getInt32(IsLast);
+ auto NumArgsValue = Builder.getInt32(NumArgs);
+ return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3,
+ Arg4, Arg5, Arg6, IsLastValue});
+}
+
+static Value *appendArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+ bool IsLast) {
+ auto Arg0 = fitArgInto64Bits(Builder, Arg);
+ auto Zero = Builder.getInt64(0);
+ return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero,
+ Zero, IsLast);
+}
+
+// The device library does not provide strlen, so we build our own loop
+// here. While we are at it, we also include the terminating null in the length.
+static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) {
+ auto *Prev = Builder.GetInsertBlock();
+ Module *M = Prev->getModule();
+
+ auto CharZero = Builder.getInt8(0);
+ auto One = Builder.getInt64(1);
+ auto Zero = Builder.getInt64(0);
+ auto Int64Ty = Builder.getInt64Ty();
+
+ // The length is either zero for a null pointer, or the computed value for an
+ // actual string. We need a join block for a phi that represents the final
+ // value.
+ //
+ // Strictly speaking, the zero does not matter since
+ // __ockl_printf_append_string_n ignores the length if the pointer is null.
+ BasicBlock *Join = nullptr;
+ if (Prev->getTerminator()) {
+ Join = Prev->splitBasicBlock(Builder.GetInsertPoint(),
+ "strlen.join");
+ Prev->getTerminator()->eraseFromParent();
+ } else {
+ Join = BasicBlock::Create(M->getContext(), "strlen.join",
+ Prev->getParent());
+ }
+ BasicBlock *While =
+ BasicBlock::Create(M->getContext(), "strlen.while",
+ Prev->getParent(), Join);
+ BasicBlock *WhileDone = BasicBlock::Create(
+ M->getContext(), "strlen.while.done",
+ Prev->getParent(), Join);
+
+ // Emit an early return for when the pointer is null.
+ Builder.SetInsertPoint(Prev);
+ auto CmpNull =
+ Builder.CreateICmpEQ(Str, Constant::getNullValue(Str->getType()));
+ BranchInst::Create(Join, While, CmpNull, Prev);
+
+ // Entry to the while loop.
+ Builder.SetInsertPoint(While);
+
+ auto PtrPhi = Builder.CreatePHI(Str->getType(), 2);
+ PtrPhi->addIncoming(Str, Prev);
+ auto PtrNext = Builder.CreateGEP(PtrPhi, One);
+ PtrPhi->addIncoming(PtrNext, While);
+
+ // Condition for the while loop.
+ auto Data = Builder.CreateLoad(PtrPhi);
+ auto Cmp = Builder.CreateICmpEQ(Data, CharZero);
+ Builder.CreateCondBr(Cmp, WhileDone, While);
+
+ // Add one to the computed length.
+ Builder.SetInsertPoint(WhileDone, WhileDone->begin());
+ auto Begin = Builder.CreatePtrToInt(Str, Int64Ty);
+ auto End = Builder.CreatePtrToInt(PtrPhi, Int64Ty);
+ auto Len = Builder.CreateSub(End, Begin);
+ Len = Builder.CreateAdd(Len, One);
+
+ // Final join.
+ BranchInst::Create(Join, WhileDone);
+ Builder.SetInsertPoint(Join, Join->begin());
+ auto LenPhi = Builder.CreatePHI(Len->getType(), 2);
+ LenPhi->addIncoming(Len, WhileDone);
+ LenPhi->addIncoming(Zero, Prev);
+
+ return LenPhi;
+}
+
+static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str,
+ Value *Length, bool isLast) {
+ auto Int64Ty = Builder.getInt64Ty();
+ auto CharPtrTy = Builder.getInt8PtrTy();
+ auto Int32Ty = Builder.getInt32Ty();
+ auto M = Builder.GetInsertBlock()->getModule();
+ auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty,
+ Int64Ty, CharPtrTy, Int64Ty, Int32Ty);
+ auto IsLastInt32 = Builder.getInt32(isLast);
+ return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32});
+}
+
+static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+ bool IsLast) {
+ auto Length = getStrlenWithNull(Builder, Arg);
+ return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
+}
+
+static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+ bool SpecIsCString, bool IsLast) {
+ if (SpecIsCString && isCString(Arg)) {
+ return appendString(Builder, Desc, Arg, IsLast);
+ }
+ // If the format specifies a string but the argument is not, the frontend will
+ // have printed a warning. We just rely on undefined behaviour and send the
+ // argument anyway.
+ return appendArg(Builder, Desc, Arg, IsLast);
+}
+
+// Scan the format string to locate all specifiers, and mark the ones that
+// specify a string, i.e, the "%s" specifier with optional '*' characters.
+static void locateCStrings(SparseBitVector<8> &BV, Value *Fmt) {
+ StringRef Str;
+ if (!getConstantStringInfo(Fmt, Str) || Str.empty())
+ return;
+
+ static const char ConvSpecifiers[] = "diouxXfFeEgGaAcspn";
+ size_t SpecPos = 0;
+ // Skip the first argument, the format string.
+ unsigned ArgIdx = 1;
+
+ while ((SpecPos = Str.find_first_of('%', SpecPos)) != StringRef::npos) {
+ if (Str[SpecPos + 1] == '%') {
+ SpecPos += 2;
+ continue;
+ }
+ auto SpecEnd = Str.find_first_of(ConvSpecifiers, SpecPos);
+ if (SpecEnd == StringRef::npos)
+ return;
+ auto Spec = Str.slice(SpecPos, SpecEnd + 1);
+ ArgIdx += Spec.count('*');
+ if (Str[SpecEnd] == 's') {
+ BV.set(ArgIdx);
+ }
+ SpecPos = SpecEnd + 1;
+ ++ArgIdx;
+ }
+}
+
+Value *llvm::emitAMDGPUPrintfCall(IRBuilder<> &Builder,
+ ArrayRef<Value *> Args) {
+ auto NumOps = Args.size();
+ assert(NumOps >= 1);
+
+ auto Fmt = Args[0];
+ SparseBitVector<8> SpecIsCString;
+ locateCStrings(SpecIsCString, Fmt);
+
+ auto Desc = callPrintfBegin(Builder, Builder.getIntN(64, 0));
+ Desc = appendString(Builder, Desc, Fmt, NumOps == 1);
+
+ // FIXME: This invokes hostcall once for each argument. We can pack up to
+ // seven scalar printf arguments in a single hostcall. See the signature of
+ // callAppendArgs().
+ for (unsigned int i = 1; i != NumOps; ++i) {
+ bool IsLast = i == NumOps - 1;
+ bool IsCString = SpecIsCString.test(i);
+ Desc = processArg(Builder, Desc, Args[i], IsCString, IsLast);
+ }
+
+ return Builder.CreateTrunc(Desc, Builder.getInt32Ty());
+}
diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
new file mode 100644
index 0000000000000..7ff73fcdada79
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
@@ -0,0 +1,618 @@
+//===- AssumeBundleBuilder.cpp - tools to preserve informations -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "assume-builder"
+
+#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumeBundleQueries.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DebugCounter.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+
+cl::opt<bool> ShouldPreserveAllAttributes(
+ "assume-preserve-all", cl::init(false), cl::Hidden,
+ cl::desc("enable preservation of all attrbitues. even those that are "
+ "unlikely to be usefull"));
+
+cl::opt<bool> EnableKnowledgeRetention(
+ "enable-knowledge-retention", cl::init(false), cl::Hidden,
+ cl::desc(
+ "enable preservation of attributes throughout code transformation"));
+
+STATISTIC(NumAssumeBuilt, "Number of assume built by the assume builder");
+STATISTIC(NumBundlesInAssumes, "Total number of Bundles in the assume built");
+STATISTIC(NumAssumesMerged,
+ "Number of assume merged by the assume simplify pass");
+STATISTIC(NumAssumesRemoved,
+ "Number of assume removed by the assume simplify pass");
+
+DEBUG_COUNTER(BuildAssumeCounter, "assume-builder-counter",
+ "Controls which assumes gets created");
+
+namespace {
+
+bool isUsefullToPreserve(Attribute::AttrKind Kind) {
+ switch (Kind) {
+ case Attribute::NonNull:
+ case Attribute::Alignment:
+ case Attribute::Dereferenceable:
+ case Attribute::DereferenceableOrNull:
+ case Attribute::Cold:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// This function will try to transform the given knowledge into a more
+/// canonical one. the canonical knowledge maybe the given one.
+RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK, Module *M) {
+ switch (RK.AttrKind) {
+ default:
+ return RK;
+ case Attribute::NonNull:
+ RK.WasOn = GetUnderlyingObject(RK.WasOn, M->getDataLayout());
+ return RK;
+ case Attribute::Alignment: {
+ Value *V = RK.WasOn->stripInBoundsOffsets([&](const Value *Strip) {
+ if (auto *GEP = dyn_cast<GEPOperator>(Strip))
+ RK.ArgValue =
+ MinAlign(RK.ArgValue,
+ GEP->getMaxPreservedAlignment(M->getDataLayout()).value());
+ });
+ RK.WasOn = V;
+ return RK;
+ }
+ case Attribute::Dereferenceable:
+ case Attribute::DereferenceableOrNull: {
+ int64_t Offset = 0;
+ Value *V = GetPointerBaseWithConstantOffset(
+ RK.WasOn, Offset, M->getDataLayout(), /*AllowNonInBounds*/ false);
+ if (Offset < 0)
+ return RK;
+ RK.ArgValue = RK.ArgValue + Offset;
+ RK.WasOn = V;
+ }
+ }
+ return RK;
+}
+
+/// This class contain all knowledge that have been gather while building an
+/// llvm.assume and the function to manipulate it.
+struct AssumeBuilderState {
+ Module *M;
+
+ using MapKey = std::pair<Value *, Attribute::AttrKind>;
+ SmallMapVector<MapKey, unsigned, 8> AssumedKnowledgeMap;
+ Instruction *InstBeingRemoved = nullptr;
+ AssumptionCache* AC = nullptr;
+ DominatorTree* DT = nullptr;
+
+ AssumeBuilderState(Module *M, Instruction *I = nullptr,
+ AssumptionCache *AC = nullptr, DominatorTree *DT = nullptr)
+ : M(M), InstBeingRemoved(I), AC(AC), DT(DT) {}
+
+ bool tryToPreserveWithoutAddingAssume(RetainedKnowledge RK) {
+ if (!InstBeingRemoved || !RK.WasOn)
+ return false;
+ bool HasBeenPreserved = false;
+ Use* ToUpdate = nullptr;
+ getKnowledgeForValue(
+ RK.WasOn, {RK.AttrKind}, AC,
+ [&](RetainedKnowledge RKOther, Instruction *Assume,
+ const CallInst::BundleOpInfo *Bundle) {
+ if (!isValidAssumeForContext(Assume, InstBeingRemoved, DT))
+ return false;
+ if (RKOther.ArgValue >= RK.ArgValue) {
+ HasBeenPreserved = true;
+ return true;
+ } else if (isValidAssumeForContext(InstBeingRemoved, Assume,
+ DT)) {
+ HasBeenPreserved = true;
+ IntrinsicInst *Intr = cast<IntrinsicInst>(Assume);
+ ToUpdate = &Intr->op_begin()[Bundle->Begin + ABA_Argument];
+ return true;
+ }
+ return false;
+ });
+ if (ToUpdate)
+ ToUpdate->set(
+ ConstantInt::get(Type::getInt64Ty(M->getContext()), RK.ArgValue));
+ return HasBeenPreserved;
+ }
+
+ bool isKnowledgeWorthPreserving(RetainedKnowledge RK) {
+ if (!RK)
+ return false;
+ if (!RK.WasOn)
+ return true;
+ if (RK.WasOn->getType()->isPointerTy()) {
+ Value *UnderlyingPtr = GetUnderlyingObject(RK.WasOn, M->getDataLayout());
+ if (isa<AllocaInst>(UnderlyingPtr) || isa<GlobalValue>(UnderlyingPtr))
+ return false;
+ }
+ if (auto *Arg = dyn_cast<Argument>(RK.WasOn)) {
+ if (Arg->hasAttribute(RK.AttrKind) &&
+ (!Attribute::doesAttrKindHaveArgument(RK.AttrKind) ||
+ Arg->getAttribute(RK.AttrKind).getValueAsInt() >= RK.ArgValue))
+ return false;
+ return true;
+ }
+ if (auto *Inst = dyn_cast<Instruction>(RK.WasOn))
+ if (wouldInstructionBeTriviallyDead(Inst)) {
+ if (RK.WasOn->use_empty())
+ return false;
+ Use *SingleUse = RK.WasOn->getSingleUndroppableUse();
+ if (SingleUse && SingleUse->getUser() == InstBeingRemoved)
+ return false;
+ }
+ return true;
+ }
+
+ void addKnowledge(RetainedKnowledge RK) {
+ RK = canonicalizedKnowledge(RK, M);
+
+ if (!isKnowledgeWorthPreserving(RK))
+ return;
+
+ if (tryToPreserveWithoutAddingAssume(RK))
+ return;
+ MapKey Key{RK.WasOn, RK.AttrKind};
+ auto Lookup = AssumedKnowledgeMap.find(Key);
+ if (Lookup == AssumedKnowledgeMap.end()) {
+ AssumedKnowledgeMap[Key] = RK.ArgValue;
+ return;
+ }
+ assert(((Lookup->second == 0 && RK.ArgValue == 0) ||
+ (Lookup->second != 0 && RK.ArgValue != 0)) &&
+ "inconsistent argument value");
+
+ /// This is only desirable because for all attributes taking an argument
+ /// higher is better.
+ Lookup->second = std::max(Lookup->second, RK.ArgValue);
+ }
+
+ void addAttribute(Attribute Attr, Value *WasOn) {
+ if (Attr.isTypeAttribute() || Attr.isStringAttribute() ||
+ (!ShouldPreserveAllAttributes &&
+ !isUsefullToPreserve(Attr.getKindAsEnum())))
+ return;
+ unsigned AttrArg = 0;
+ if (Attr.isIntAttribute())
+ AttrArg = Attr.getValueAsInt();
+ addKnowledge({Attr.getKindAsEnum(), AttrArg, WasOn});
+ }
+
+ void addCall(const CallBase *Call) {
+ auto addAttrList = [&](AttributeList AttrList) {
+ for (unsigned Idx = AttributeList::FirstArgIndex;
+ Idx < AttrList.getNumAttrSets(); Idx++)
+ for (Attribute Attr : AttrList.getAttributes(Idx))
+ addAttribute(Attr, Call->getArgOperand(Idx - 1));
+ for (Attribute Attr : AttrList.getFnAttributes())
+ addAttribute(Attr, nullptr);
+ };
+ addAttrList(Call->getAttributes());
+ if (Function *Fn = Call->getCalledFunction())
+ addAttrList(Fn->getAttributes());
+ }
+
+ IntrinsicInst *build() {
+ if (AssumedKnowledgeMap.empty())
+ return nullptr;
+ if (!DebugCounter::shouldExecute(BuildAssumeCounter))
+ return nullptr;
+ Function *FnAssume = Intrinsic::getDeclaration(M, Intrinsic::assume);
+ LLVMContext &C = M->getContext();
+ SmallVector<OperandBundleDef, 8> OpBundle;
+ for (auto &MapElem : AssumedKnowledgeMap) {
+ SmallVector<Value *, 2> Args;
+ if (MapElem.first.first)
+ Args.push_back(MapElem.first.first);
+
+ /// This is only valid because for all attribute that currently exist a
+ /// value of 0 is useless. and should not be preserved.
+ if (MapElem.second)
+ Args.push_back(ConstantInt::get(Type::getInt64Ty(M->getContext()),
+ MapElem.second));
+ OpBundle.push_back(OperandBundleDefT<Value *>(
+ std::string(Attribute::getNameFromAttrKind(MapElem.first.second)),
+ Args));
+ NumBundlesInAssumes++;
+ }
+ NumAssumeBuilt++;
+ return cast<IntrinsicInst>(CallInst::Create(
+ FnAssume, ArrayRef<Value *>({ConstantInt::getTrue(C)}), OpBundle));
+ }
+
+ void addAccessedPtr(Instruction *MemInst, Value *Pointer, Type *AccType,
+ MaybeAlign MA) {
+ unsigned DerefSize = MemInst->getModule()
+ ->getDataLayout()
+ .getTypeStoreSize(AccType)
+ .getKnownMinSize();
+ if (DerefSize != 0) {
+ addKnowledge({Attribute::Dereferenceable, DerefSize, Pointer});
+ if (!NullPointerIsDefined(MemInst->getFunction(),
+ Pointer->getType()->getPointerAddressSpace()))
+ addKnowledge({Attribute::NonNull, 0u, Pointer});
+ }
+ if (MA.valueOrOne() > 1)
+ addKnowledge(
+ {Attribute::Alignment, unsigned(MA.valueOrOne().value()), Pointer});
+ }
+
+ void addInstruction(Instruction *I) {
+ if (auto *Call = dyn_cast<CallBase>(I))
+ return addCall(Call);
+ if (auto *Load = dyn_cast<LoadInst>(I))
+ return addAccessedPtr(I, Load->getPointerOperand(), Load->getType(),
+ Load->getAlign());
+ if (auto *Store = dyn_cast<StoreInst>(I))
+ return addAccessedPtr(I, Store->getPointerOperand(),
+ Store->getValueOperand()->getType(),
+ Store->getAlign());
+ // TODO: Add support for the other Instructions.
+ // TODO: Maybe we should look around and merge with other llvm.assume.
+ }
+};
+
+} // namespace
+
+IntrinsicInst *llvm::buildAssumeFromInst(Instruction *I) {
+ if (!EnableKnowledgeRetention)
+ return nullptr;
+ AssumeBuilderState Builder(I->getModule());
+ Builder.addInstruction(I);
+ return Builder.build();
+}
+
+void llvm::salvageKnowledge(Instruction *I, AssumptionCache *AC,
+ DominatorTree *DT) {
+ if (!EnableKnowledgeRetention || I->isTerminator())
+ return;
+ AssumeBuilderState Builder(I->getModule(), I, AC, DT);
+ Builder.addInstruction(I);
+ if (IntrinsicInst *Intr = Builder.build()) {
+ Intr->insertBefore(I);
+ if (AC)
+ AC->registerAssumption(Intr);
+ }
+}
+
+namespace {
+
+struct AssumeSimplify {
+ Function &F;
+ AssumptionCache &AC;
+ DominatorTree *DT;
+ LLVMContext &C;
+ SmallDenseSet<IntrinsicInst *> CleanupToDo;
+ StringMapEntry<uint32_t> *IgnoreTag;
+ SmallDenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 4>, 8> BBToAssume;
+ bool MadeChange = false;
+
+ AssumeSimplify(Function &F, AssumptionCache &AC, DominatorTree *DT,
+ LLVMContext &C)
+ : F(F), AC(AC), DT(DT), C(C),
+ IgnoreTag(C.getOrInsertBundleTag(IgnoreBundleTag)) {}
+
+ void buildMapping(bool FilterBooleanArgument) {
+ BBToAssume.clear();
+ for (Value *V : AC.assumptions()) {
+ if (!V)
+ continue;
+ IntrinsicInst *Assume = cast<IntrinsicInst>(V);
+ if (FilterBooleanArgument) {
+ auto *Arg = dyn_cast<ConstantInt>(Assume->getOperand(0));
+ if (!Arg || Arg->isZero())
+ continue;
+ }
+ BBToAssume[Assume->getParent()].push_back(Assume);
+ }
+
+ for (auto &Elem : BBToAssume) {
+ llvm::sort(Elem.second,
+ [](const IntrinsicInst *LHS, const IntrinsicInst *RHS) {
+ return LHS->comesBefore(RHS);
+ });
+ }
+ }
+
+ /// Remove all asumes in CleanupToDo if there boolean argument is true and
+ /// ForceCleanup is set or the assume doesn't hold valuable knowledge.
+ void RunCleanup(bool ForceCleanup) {
+ for (IntrinsicInst *Assume : CleanupToDo) {
+ auto *Arg = dyn_cast<ConstantInt>(Assume->getOperand(0));
+ if (!Arg || Arg->isZero() ||
+ (!ForceCleanup && !isAssumeWithEmptyBundle(*Assume)))
+ continue;
+ MadeChange = true;
+ if (ForceCleanup)
+ NumAssumesMerged++;
+ else
+ NumAssumesRemoved++;
+ Assume->eraseFromParent();
+ }
+ CleanupToDo.clear();
+ }
+
+ /// Remove knowledge stored in assume when it is already know by an attribute
+ /// or an other assume. This can when valid update an existing knowledge in an
+ /// attribute or an other assume.
+ void dropRedundantKnowledge() {
+ struct MapValue {
+ IntrinsicInst *Assume;
+ unsigned ArgValue;
+ CallInst::BundleOpInfo *BOI;
+ };
+ buildMapping(false);
+ SmallDenseMap<std::pair<Value *, Attribute::AttrKind>,
+ SmallVector<MapValue, 2>, 16>
+ Knowledge;
+ for (BasicBlock *BB : depth_first(&F))
+ for (Value *V : BBToAssume[BB]) {
+ if (!V)
+ continue;
+ IntrinsicInst *Assume = cast<IntrinsicInst>(V);
+ for (CallInst::BundleOpInfo &BOI : Assume->bundle_op_infos()) {
+ auto RemoveFromAssume = [&]() {
+ CleanupToDo.insert(Assume);
+ if (BOI.Begin != BOI.End) {
+ Use *U = &Assume->op_begin()[BOI.Begin + ABA_WasOn];
+ U->set(UndefValue::get(U->get()->getType()));
+ }
+ BOI.Tag = IgnoreTag;
+ };
+ if (BOI.Tag == IgnoreTag) {
+ CleanupToDo.insert(Assume);
+ continue;
+ }
+ RetainedKnowledge RK = getKnowledgeFromBundle(*Assume, BOI);
+ if (auto *Arg = dyn_cast_or_null<Argument>(RK.WasOn)) {
+ bool HasSameKindAttr = Arg->hasAttribute(RK.AttrKind);
+ if (HasSameKindAttr)
+ if (!Attribute::doesAttrKindHaveArgument(RK.AttrKind) ||
+ Arg->getAttribute(RK.AttrKind).getValueAsInt() >=
+ RK.ArgValue) {
+ RemoveFromAssume();
+ continue;
+ }
+ if (isValidAssumeForContext(
+ Assume, &*F.getEntryBlock().getFirstInsertionPt()) ||
+ Assume == &*F.getEntryBlock().getFirstInsertionPt()) {
+ if (HasSameKindAttr)
+ Arg->removeAttr(RK.AttrKind);
+ Arg->addAttr(Attribute::get(C, RK.AttrKind, RK.ArgValue));
+ MadeChange = true;
+ RemoveFromAssume();
+ continue;
+ }
+ }
+ auto &Lookup = Knowledge[{RK.WasOn, RK.AttrKind}];
+ for (MapValue &Elem : Lookup) {
+ if (!isValidAssumeForContext(Elem.Assume, Assume, DT))
+ continue;
+ if (Elem.ArgValue >= RK.ArgValue) {
+ RemoveFromAssume();
+ continue;
+ } else if (isValidAssumeForContext(Assume, Elem.Assume, DT)) {
+ Elem.Assume->op_begin()[Elem.BOI->Begin + ABA_Argument].set(
+ ConstantInt::get(Type::getInt64Ty(C), RK.ArgValue));
+ MadeChange = true;
+ RemoveFromAssume();
+ continue;
+ }
+ }
+ Lookup.push_back({Assume, RK.ArgValue, &BOI});
+ }
+ }
+ }
+
+ using MergeIterator = SmallVectorImpl<IntrinsicInst *>::iterator;
+
+ /// Merge all Assumes from Begin to End in and insert the resulting assume as
+ /// high as possible in the basicblock.
+ void mergeRange(BasicBlock *BB, MergeIterator Begin, MergeIterator End) {
+ if (Begin == End || std::next(Begin) == End)
+ return;
+ /// Provide no additional information so that AssumeBuilderState doesn't
+ /// try to do any punning since it already has been done better.
+ AssumeBuilderState Builder(F.getParent());
+
+ /// For now it is initialized to the best value it could have
+ Instruction *InsertPt = BB->getFirstNonPHI();
+ if (isa<LandingPadInst>(InsertPt))
+ InsertPt = InsertPt->getNextNode();
+ for (IntrinsicInst *I : make_range(Begin, End)) {
+ CleanupToDo.insert(I);
+ for (CallInst::BundleOpInfo &BOI : I->bundle_op_infos()) {
+ RetainedKnowledge RK = getKnowledgeFromBundle(*I, BOI);
+ if (!RK)
+ continue;
+ Builder.addKnowledge(RK);
+ if (auto *I = dyn_cast_or_null<Instruction>(RK.WasOn))
+ if (I->getParent() == InsertPt->getParent() &&
+ (InsertPt->comesBefore(I) || InsertPt == I))
+ InsertPt = I->getNextNode();
+ }
+ }
+
+ /// Adjust InsertPt if it is before Begin, since mergeAssumes only
+ /// guarantees we can place the resulting assume between Begin and End.
+ if (InsertPt->comesBefore(*Begin))
+ for (auto It = (*Begin)->getIterator(), E = InsertPt->getIterator();
+ It != E; --It)
+ if (!isGuaranteedToTransferExecutionToSuccessor(&*It)) {
+ InsertPt = It->getNextNode();
+ break;
+ }
+ IntrinsicInst *MergedAssume = Builder.build();
+ if (!MergedAssume)
+ return;
+ MadeChange = true;
+ MergedAssume->insertBefore(InsertPt);
+ AC.registerAssumption(MergedAssume);
+ }
+
+ /// Merge assume when they are in the same BasicBlock and for all instruction
+ /// between them isGuaranteedToTransferExecutionToSuccessor returns true.
+ void mergeAssumes() {
+ buildMapping(true);
+
+ SmallVector<MergeIterator, 4> SplitPoints;
+ for (auto &Elem : BBToAssume) {
+ SmallVectorImpl<IntrinsicInst *> &AssumesInBB = Elem.second;
+ if (AssumesInBB.size() < 2)
+ continue;
+ /// AssumesInBB is already sorted by order in the block.
+
+ BasicBlock::iterator It = AssumesInBB.front()->getIterator();
+ BasicBlock::iterator E = AssumesInBB.back()->getIterator();
+ SplitPoints.push_back(AssumesInBB.begin());
+ MergeIterator LastSplit = AssumesInBB.begin();
+ for (; It != E; ++It)
+ if (!isGuaranteedToTransferExecutionToSuccessor(&*It)) {
+ for (; (*LastSplit)->comesBefore(&*It); ++LastSplit)
+ ;
+ if (SplitPoints.back() != LastSplit)
+ SplitPoints.push_back(LastSplit);
+ }
+ SplitPoints.push_back(AssumesInBB.end());
+ for (auto SplitIt = SplitPoints.begin();
+ SplitIt != std::prev(SplitPoints.end()); SplitIt++) {
+ mergeRange(Elem.first, *SplitIt, *(SplitIt + 1));
+ }
+ SplitPoints.clear();
+ }
+ }
+};
+
+bool simplifyAssumes(Function &F, AssumptionCache *AC, DominatorTree *DT) {
+ AssumeSimplify AS(F, *AC, DT, F.getContext());
+
+ /// Remove knowledge that is already known by a dominating other assume or an
+ /// attribute.
+ AS.dropRedundantKnowledge();
+
+ /// Remove assume that are empty.
+ AS.RunCleanup(false);
+
+ /// Merge assume in the same basicblock when possible.
+ AS.mergeAssumes();
+
+ /// Remove assume that were merged.
+ AS.RunCleanup(true);
+ return AS.MadeChange;
+}
+
+} // namespace
+
+PreservedAnalyses AssumeSimplifyPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ if (!EnableKnowledgeRetention)
+ return PreservedAnalyses::all();
+ simplifyAssumes(F, &AM.getResult<AssumptionAnalysis>(F),
+ AM.getCachedResult<DominatorTreeAnalysis>(F));
+ return PreservedAnalyses::all();
+}
+
+namespace {
+class AssumeSimplifyPassLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ AssumeSimplifyPassLegacyPass() : FunctionPass(ID) {
+ initializeAssumeSimplifyPassLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F) || !EnableKnowledgeRetention)
+ return false;
+ AssumptionCache &AC =
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ return simplifyAssumes(F, &AC, DTWP ? &DTWP->getDomTree() : nullptr);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+
+ AU.setPreservesAll();
+ }
+};
+} // namespace
+
+char AssumeSimplifyPassLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(AssumeSimplifyPassLegacyPass, "assume-simplify",
+ "Assume Simplify", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(AssumeSimplifyPassLegacyPass, "assume-simplify",
+ "Assume Simplify", false, false)
+
+FunctionPass *llvm::createAssumeSimplifyPass() {
+ return new AssumeSimplifyPassLegacyPass();
+}
+
+PreservedAnalyses AssumeBuilderPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
+ DominatorTree* DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+ for (Instruction &I : instructions(F))
+ salvageKnowledge(&I, AC, DT);
+ return PreservedAnalyses::all();
+}
+
+namespace {
+class AssumeBuilderPassLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ AssumeBuilderPassLegacyPass() : FunctionPass(ID) {
+ initializeAssumeBuilderPassLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override {
+ AssumptionCache &AC =
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ for (Instruction &I : instructions(F))
+ salvageKnowledge(&I, &AC, DTWP ? &DTWP->getDomTree() : nullptr);
+ return true;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+
+ AU.setPreservesAll();
+ }
+};
+} // namespace
+
+char AssumeBuilderPassLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(AssumeBuilderPassLegacyPass, "assume-builder",
+ "Assume Builder", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(AssumeBuilderPassLegacyPass, "assume-builder",
+ "Assume Builder", false, false)
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index c9eb4abfa21ae..085d91031cf90 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -153,7 +153,8 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
}
}
-bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
+bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI,
+ MemorySSAUpdater *MSSAU) {
// Recursively deleting a PHI may cause multiple PHIs to be deleted
// or RAUW'd undef, so use an array of WeakTrackingVH for the PHIs to delete.
SmallVector<WeakTrackingVH, 8> PHIs;
@@ -163,7 +164,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
bool Changed = false;
for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*()))
- Changed |= RecursivelyDeleteDeadPHINode(PN, TLI);
+ Changed |= RecursivelyDeleteDeadPHINode(PN, TLI, MSSAU);
return Changed;
}
@@ -314,6 +315,31 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
return true;
}
+bool llvm::MergeBlockSuccessorsIntoGivenBlocks(
+ SmallPtrSetImpl<BasicBlock *> &MergeBlocks, Loop *L, DomTreeUpdater *DTU,
+ LoopInfo *LI) {
+ assert(!MergeBlocks.empty() && "MergeBlocks should not be empty");
+
+ bool BlocksHaveBeenMerged = false;
+ while (!MergeBlocks.empty()) {
+ BasicBlock *BB = *MergeBlocks.begin();
+ BasicBlock *Dest = BB->getSingleSuccessor();
+ if (Dest && (!L || L->contains(Dest))) {
+ BasicBlock *Fold = Dest->getUniquePredecessor();
+ (void)Fold;
+ if (MergeBlockIntoPredecessor(Dest, DTU, LI)) {
+ assert(Fold == BB &&
+ "Expecting BB to be unique predecessor of the Dest block");
+ MergeBlocks.erase(Dest);
+ BlocksHaveBeenMerged = true;
+ } else
+ MergeBlocks.erase(BB);
+ } else
+ MergeBlocks.erase(BB);
+ }
+ return BlocksHaveBeenMerged;
+}
+
/// Remove redundant instructions within sequences of consecutive dbg.value
/// instructions. This is done using a backward scan to keep the last dbg.value
/// describing a specific variable/fragment.
@@ -505,7 +531,8 @@ llvm::SplitAllCriticalEdges(Function &F,
unsigned NumBroken = 0;
for (BasicBlock &BB : F) {
Instruction *TI = BB.getTerminator();
- if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
+ if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI) &&
+ !isa<CallBrInst>(TI))
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
if (SplitCriticalEdge(TI, i, Options))
++NumBroken;
@@ -900,9 +927,25 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
Pred->getInstList().insert(NewRet->getIterator(), NewBC);
*i = NewBC;
}
+
+ Instruction *NewEV = nullptr;
+ if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) {
+ V = EVI->getOperand(0);
+ NewEV = EVI->clone();
+ if (NewBC) {
+ NewBC->setOperand(0, NewEV);
+ Pred->getInstList().insert(NewBC->getIterator(), NewEV);
+ } else {
+ Pred->getInstList().insert(NewRet->getIterator(), NewEV);
+ *i = NewEV;
+ }
+ }
+
if (PHINode *PN = dyn_cast<PHINode>(V)) {
if (PN->getParent() == BB) {
- if (NewBC)
+ if (NewEV) {
+ NewEV->setOperand(0, PN->getIncomingValueForBlock(Pred));
+ } else if (NewBC)
NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred));
else
*i = PN->getIncomingValueForBlock(Pred);
@@ -1084,3 +1127,247 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
}
return BI->getCondition();
}
+
+// After creating a control flow hub, the operands of PHINodes in an outgoing
+// block Out no longer match the predecessors of that block. Predecessors of Out
+// that are incoming blocks to the hub are now replaced by just one edge from
+// the hub. To match this new control flow, the corresponding values from each
+// PHINode must now be moved a new PHINode in the first guard block of the hub.
+//
+// This operation cannot be performed with SSAUpdater, because it involves one
+// new use: If the block Out is in the list of Incoming blocks, then the newly
+// created PHI in the Hub will use itself along that edge from Out to Hub.
+static void reconnectPhis(BasicBlock *Out, BasicBlock *GuardBlock,
+ const SetVector<BasicBlock *> &Incoming,
+ BasicBlock *FirstGuardBlock) {
+ auto I = Out->begin();
+ while (I != Out->end() && isa<PHINode>(I)) {
+ auto Phi = cast<PHINode>(I);
+ auto NewPhi =
+ PHINode::Create(Phi->getType(), Incoming.size(),
+ Phi->getName() + ".moved", &FirstGuardBlock->back());
+ for (auto In : Incoming) {
+ Value *V = UndefValue::get(Phi->getType());
+ if (In == Out) {
+ V = NewPhi;
+ } else if (Phi->getBasicBlockIndex(In) != -1) {
+ V = Phi->removeIncomingValue(In, false);
+ }
+ NewPhi->addIncoming(V, In);
+ }
+ assert(NewPhi->getNumIncomingValues() == Incoming.size());
+ if (Phi->getNumOperands() == 0) {
+ Phi->replaceAllUsesWith(NewPhi);
+ I = Phi->eraseFromParent();
+ continue;
+ }
+ Phi->addIncoming(NewPhi, GuardBlock);
+ ++I;
+ }
+}
+
+using BBPredicates = DenseMap<BasicBlock *, PHINode *>;
+using BBSetVector = SetVector<BasicBlock *>;
+
+// Redirects the terminator of the incoming block to the first guard
+// block in the hub. The condition of the original terminator (if it
+// was conditional) and its original successors are returned as a
+// tuple <condition, succ0, succ1>. The function additionally filters
+// out successors that are not in the set of outgoing blocks.
+//
+// - condition is non-null iff the branch is conditional.
+// - Succ1 is non-null iff the sole/taken target is an outgoing block.
+// - Succ2 is non-null iff condition is non-null and the fallthrough
+// target is an outgoing block.
+static std::tuple<Value *, BasicBlock *, BasicBlock *>
+redirectToHub(BasicBlock *BB, BasicBlock *FirstGuardBlock,
+ const BBSetVector &Outgoing) {
+ auto Branch = cast<BranchInst>(BB->getTerminator());
+ auto Condition = Branch->isConditional() ? Branch->getCondition() : nullptr;
+
+ BasicBlock *Succ0 = Branch->getSuccessor(0);
+ BasicBlock *Succ1 = nullptr;
+ Succ0 = Outgoing.count(Succ0) ? Succ0 : nullptr;
+
+ if (Branch->isUnconditional()) {
+ Branch->setSuccessor(0, FirstGuardBlock);
+ assert(Succ0);
+ } else {
+ Succ1 = Branch->getSuccessor(1);
+ Succ1 = Outgoing.count(Succ1) ? Succ1 : nullptr;
+ assert(Succ0 || Succ1);
+ if (Succ0 && !Succ1) {
+ Branch->setSuccessor(0, FirstGuardBlock);
+ } else if (Succ1 && !Succ0) {
+ Branch->setSuccessor(1, FirstGuardBlock);
+ } else {
+ Branch->eraseFromParent();
+ BranchInst::Create(FirstGuardBlock, BB);
+ }
+ }
+
+ assert(Succ0 || Succ1);
+ return std::make_tuple(Condition, Succ0, Succ1);
+}
+
+// Capture the existing control flow as guard predicates, and redirect
+// control flow from every incoming block to the first guard block in
+// the hub.
+//
+// There is one guard predicate for each outgoing block OutBB. The
+// predicate is a PHINode with one input for each InBB which
+// represents whether the hub should transfer control flow to OutBB if
+// it arrived from InBB. These predicates are NOT ORTHOGONAL. The Hub
+// evaluates them in the same order as the Outgoing set-vector, and
+// control branches to the first outgoing block whose predicate
+// evaluates to true.
+static void convertToGuardPredicates(
+ BasicBlock *FirstGuardBlock, BBPredicates &GuardPredicates,
+ SmallVectorImpl<WeakVH> &DeletionCandidates, const BBSetVector &Incoming,
+ const BBSetVector &Outgoing) {
+ auto &Context = Incoming.front()->getContext();
+ auto BoolTrue = ConstantInt::getTrue(Context);
+ auto BoolFalse = ConstantInt::getFalse(Context);
+
+ // The predicate for the last outgoing is trivially true, and so we
+ // process only the first N-1 successors.
+ for (int i = 0, e = Outgoing.size() - 1; i != e; ++i) {
+ auto Out = Outgoing[i];
+ LLVM_DEBUG(dbgs() << "Creating guard for " << Out->getName() << "\n");
+ auto Phi =
+ PHINode::Create(Type::getInt1Ty(Context), Incoming.size(),
+ StringRef("Guard.") + Out->getName(), FirstGuardBlock);
+ GuardPredicates[Out] = Phi;
+ }
+
+ for (auto In : Incoming) {
+ Value *Condition;
+ BasicBlock *Succ0;
+ BasicBlock *Succ1;
+ std::tie(Condition, Succ0, Succ1) =
+ redirectToHub(In, FirstGuardBlock, Outgoing);
+
+ // Optimization: Consider an incoming block A with both successors
+ // Succ0 and Succ1 in the set of outgoing blocks. The predicates
+ // for Succ0 and Succ1 complement each other. If Succ0 is visited
+ // first in the loop below, control will branch to Succ0 using the
+ // corresponding predicate. But if that branch is not taken, then
+ // control must reach Succ1, which means that the predicate for
+ // Succ1 is always true.
+ bool OneSuccessorDone = false;
+ for (int i = 0, e = Outgoing.size() - 1; i != e; ++i) {
+ auto Out = Outgoing[i];
+ auto Phi = GuardPredicates[Out];
+ if (Out != Succ0 && Out != Succ1) {
+ Phi->addIncoming(BoolFalse, In);
+ continue;
+ }
+ // Optimization: When only one successor is an outgoing block,
+ // the predicate is always true.
+ if (!Succ0 || !Succ1 || OneSuccessorDone) {
+ Phi->addIncoming(BoolTrue, In);
+ continue;
+ }
+ assert(Succ0 && Succ1);
+ OneSuccessorDone = true;
+ if (Out == Succ0) {
+ Phi->addIncoming(Condition, In);
+ continue;
+ }
+ auto Inverted = invertCondition(Condition);
+ DeletionCandidates.push_back(Condition);
+ Phi->addIncoming(Inverted, In);
+ }
+ }
+}
+
+// For each outgoing block OutBB, create a guard block in the Hub. The
+// first guard block was already created outside, and available as the
+// first element in the vector of guard blocks.
+//
+// Each guard block terminates in a conditional branch that transfers
+// control to the corresponding outgoing block or the next guard
+// block. The last guard block has two outgoing blocks as successors
+// since the condition for the final outgoing block is trivially
+// true. So we create one less block (including the first guard block)
+// than the number of outgoing blocks.
+static void createGuardBlocks(SmallVectorImpl<BasicBlock *> &GuardBlocks,
+ Function *F, const BBSetVector &Outgoing,
+ BBPredicates &GuardPredicates, StringRef Prefix) {
+ for (int i = 0, e = Outgoing.size() - 2; i != e; ++i) {
+ GuardBlocks.push_back(
+ BasicBlock::Create(F->getContext(), Prefix + ".guard", F));
+ }
+ assert(GuardBlocks.size() == GuardPredicates.size());
+
+ // To help keep the loop simple, temporarily append the last
+ // outgoing block to the list of guard blocks.
+ GuardBlocks.push_back(Outgoing.back());
+
+ for (int i = 0, e = GuardBlocks.size() - 1; i != e; ++i) {
+ auto Out = Outgoing[i];
+ assert(GuardPredicates.count(Out));
+ BranchInst::Create(Out, GuardBlocks[i + 1], GuardPredicates[Out],
+ GuardBlocks[i]);
+ }
+
+ // Remove the last block from the guard list.
+ GuardBlocks.pop_back();
+}
+
+BasicBlock *llvm::CreateControlFlowHub(
+ DomTreeUpdater *DTU, SmallVectorImpl<BasicBlock *> &GuardBlocks,
+ const BBSetVector &Incoming, const BBSetVector &Outgoing,
+ const StringRef Prefix) {
+ auto F = Incoming.front()->getParent();
+ auto FirstGuardBlock =
+ BasicBlock::Create(F->getContext(), Prefix + ".guard", F);
+
+ SmallVector<DominatorTree::UpdateType, 16> Updates;
+ if (DTU) {
+ for (auto In : Incoming) {
+ for (auto Succ : successors(In)) {
+ if (Outgoing.count(Succ))
+ Updates.push_back({DominatorTree::Delete, In, Succ});
+ }
+ Updates.push_back({DominatorTree::Insert, In, FirstGuardBlock});
+ }
+ }
+
+ BBPredicates GuardPredicates;
+ SmallVector<WeakVH, 8> DeletionCandidates;
+ convertToGuardPredicates(FirstGuardBlock, GuardPredicates, DeletionCandidates,
+ Incoming, Outgoing);
+
+ GuardBlocks.push_back(FirstGuardBlock);
+ createGuardBlocks(GuardBlocks, F, Outgoing, GuardPredicates, Prefix);
+
+ // Update the PHINodes in each outgoing block to match the new control flow.
+ for (int i = 0, e = GuardBlocks.size(); i != e; ++i) {
+ reconnectPhis(Outgoing[i], GuardBlocks[i], Incoming, FirstGuardBlock);
+ }
+ reconnectPhis(Outgoing.back(), GuardBlocks.back(), Incoming, FirstGuardBlock);
+
+ if (DTU) {
+ int NumGuards = GuardBlocks.size();
+ assert((int)Outgoing.size() == NumGuards + 1);
+ for (int i = 0; i != NumGuards - 1; ++i) {
+ Updates.push_back({DominatorTree::Insert, GuardBlocks[i], Outgoing[i]});
+ Updates.push_back(
+ {DominatorTree::Insert, GuardBlocks[i], GuardBlocks[i + 1]});
+ }
+ Updates.push_back({DominatorTree::Insert, GuardBlocks[NumGuards - 1],
+ Outgoing[NumGuards - 1]});
+ Updates.push_back({DominatorTree::Insert, GuardBlocks[NumGuards - 1],
+ Outgoing[NumGuards]});
+ DTU->applyUpdates(Updates);
+ }
+
+ for (auto I : DeletionCandidates) {
+ if (I->use_empty())
+ if (auto Inst = dyn_cast_or_null<Instruction>(I))
+ Inst->eraseFromParent();
+ }
+
+ return FirstGuardBlock;
+}
diff --git a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 008cea333e6b3..39fb504cf7b75 100644
--- a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -150,14 +150,51 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
// it in this generic function.
if (DestBB->isEHPad()) return nullptr;
- // Don't split the non-fallthrough edge from a callbr.
- if (isa<CallBrInst>(TI) && SuccNum > 0)
- return nullptr;
-
if (Options.IgnoreUnreachableDests &&
isa<UnreachableInst>(DestBB->getFirstNonPHIOrDbgOrLifetime()))
return nullptr;
+ auto *LI = Options.LI;
+ SmallVector<BasicBlock *, 4> LoopPreds;
+ // Check if extra modifications will be required to preserve loop-simplify
+ // form after splitting. If it would require splitting blocks with IndirectBr
+ // terminators, bail out if preserving loop-simplify form is requested.
+ if (LI) {
+ if (Loop *TIL = LI->getLoopFor(TIBB)) {
+
+ // The only that we can break LoopSimplify form by splitting a critical
+ // edge is if after the split there exists some edge from TIL to DestBB
+ // *and* the only edge into DestBB from outside of TIL is that of
+ // NewBB. If the first isn't true, then LoopSimplify still holds, NewBB
+ // is the new exit block and it has no non-loop predecessors. If the
+ // second isn't true, then DestBB was not in LoopSimplify form prior to
+ // the split as it had a non-loop predecessor. In both of these cases,
+ // the predecessor must be directly in TIL, not in a subloop, or again
+ // LoopSimplify doesn't hold.
+ for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E;
+ ++I) {
+ BasicBlock *P = *I;
+ if (P == TIBB)
+ continue; // The new block is known.
+ if (LI->getLoopFor(P) != TIL) {
+ // No need to re-simplify, it wasn't to start with.
+ LoopPreds.clear();
+ break;
+ }
+ LoopPreds.push_back(P);
+ }
+ // Loop-simplify form can be preserved, if we can split all in-loop
+ // predecessors.
+ if (any_of(LoopPreds, [](BasicBlock *Pred) {
+ return isa<IndirectBrInst>(Pred->getTerminator());
+ })) {
+ if (Options.PreserveLoopSimplify)
+ return nullptr;
+ LoopPreds.clear();
+ }
+ }
+ }
+
// Create a new basic block, linking it into the CFG.
BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
@@ -165,14 +202,14 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
BranchInst *NewBI = BranchInst::Create(DestBB, NewBB);
NewBI->setDebugLoc(TI->getDebugLoc());
- // Branch to the new block, breaking the edge.
- TI->setSuccessor(SuccNum, NewBB);
-
// Insert the block into the function... right after the block TI lives in.
Function &F = *TIBB->getParent();
Function::iterator FBBI = TIBB->getIterator();
F.getBasicBlockList().insert(++FBBI, NewBB);
+ // Branch to the new block, breaking the edge.
+ TI->setSuccessor(SuccNum, NewBB);
+
// If there are any PHI nodes in DestBB, we need to update them so that they
// merge incoming values from NewBB instead of from TIBB.
{
@@ -212,7 +249,6 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
// If we have nothing to update, just return.
auto *DT = Options.DT;
auto *PDT = Options.PDT;
- auto *LI = Options.LI;
auto *MSSAU = Options.MSSAU;
if (MSSAU)
MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
@@ -281,28 +317,6 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
createPHIsForSplitLoopExit(TIBB, NewBB, DestBB);
}
- // The only that we can break LoopSimplify form by splitting a critical
- // edge is if after the split there exists some edge from TIL to DestBB
- // *and* the only edge into DestBB from outside of TIL is that of
- // NewBB. If the first isn't true, then LoopSimplify still holds, NewBB
- // is the new exit block and it has no non-loop predecessors. If the
- // second isn't true, then DestBB was not in LoopSimplify form prior to
- // the split as it had a non-loop predecessor. In both of these cases,
- // the predecessor must be directly in TIL, not in a subloop, or again
- // LoopSimplify doesn't hold.
- SmallVector<BasicBlock *, 4> LoopPreds;
- for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E;
- ++I) {
- BasicBlock *P = *I;
- if (P == NewBB)
- continue; // The new block is known.
- if (LI->getLoopFor(P) != TIL) {
- // No need to re-simplify, it wasn't to start with.
- LoopPreds.clear();
- break;
- }
- LoopPreds.push_back(P);
- }
if (!LoopPreds.empty()) {
assert(!DestBB->isEHPad() && "We don't split edges to EH pads!");
BasicBlock *NewExitBB = SplitBlockPredecessors(
@@ -388,13 +402,20 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F,
if (FirstNonPHI->isEHPad() || Target->isLandingPad())
continue;
+ // Remember edge probabilities if needed.
+ SmallVector<BranchProbability, 4> EdgeProbabilities;
+ if (ShouldUpdateAnalysis) {
+ EdgeProbabilities.reserve(Target->getTerminator()->getNumSuccessors());
+ for (unsigned I = 0, E = Target->getTerminator()->getNumSuccessors();
+ I < E; ++I)
+ EdgeProbabilities.emplace_back(BPI->getEdgeProbability(Target, I));
+ BPI->eraseBlock(Target);
+ }
+
BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");
if (ShouldUpdateAnalysis) {
// Copy the BFI/BPI from Target to BodyBlock.
- for (unsigned I = 0, E = BodyBlock->getTerminator()->getNumSuccessors();
- I < E; ++I)
- BPI->setEdgeProbability(BodyBlock, I,
- BPI->getEdgeProbability(Target, I));
+ BPI->setEdgeProbability(BodyBlock, EdgeProbabilities);
BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target).getFrequency());
}
// It's possible Target was its own successor through an indirectbr.
@@ -423,7 +444,6 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F,
BlockFrequency NewBlockFreqForTarget =
BFI->getBlockFreq(Target) - BlockFreqForDirectSucc;
BFI->setBlockFreq(Target, NewBlockFreqForTarget.getFrequency());
- BPI->eraseBlock(Target);
}
// Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 71316ce8f7583..c64ad147fdfec 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -378,6 +378,10 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
+ case LibFunc_aligned_alloc:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ return Changed;
case LibFunc_bcopy:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
@@ -819,14 +823,14 @@ StringRef llvm::getFloatFnName(const TargetLibraryInfo *TLI, Type *Ty,
//- Emit LibCalls ------------------------------------------------------------//
-Value *llvm::castToCStr(Value *V, IRBuilder<> &B) {
+Value *llvm::castToCStr(Value *V, IRBuilderBase &B) {
unsigned AS = V->getType()->getPointerAddressSpace();
return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr");
}
static Value *emitLibCall(LibFunc TheLibFunc, Type *ReturnType,
ArrayRef<Type *> ParamTypes,
- ArrayRef<Value *> Operands, IRBuilder<> &B,
+ ArrayRef<Value *> Operands, IRBuilderBase &B,
const TargetLibraryInfo *TLI,
bool IsVaArgs = false) {
if (!TLI->has(TheLibFunc))
@@ -844,20 +848,20 @@ static Value *emitLibCall(LibFunc TheLibFunc, Type *ReturnType,
return CI;
}
-Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
+Value *llvm::emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
LLVMContext &Context = B.GetInsertBlock()->getContext();
return emitLibCall(LibFunc_strlen, DL.getIntPtrType(Context),
B.getInt8PtrTy(), castToCStr(Ptr, B), B, TLI);
}
-Value *llvm::emitStrDup(Value *Ptr, IRBuilder<> &B,
+Value *llvm::emitStrDup(Value *Ptr, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
return emitLibCall(LibFunc_strdup, B.getInt8PtrTy(), B.getInt8PtrTy(),
castToCStr(Ptr, B), B, TLI);
}
-Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
Type *I8Ptr = B.getInt8PtrTy();
Type *I32Ty = B.getInt32Ty();
@@ -865,7 +869,7 @@ Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
{castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, B, TLI);
}
-Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
LLVMContext &Context = B.GetInsertBlock()->getContext();
return emitLibCall(
@@ -874,28 +878,28 @@ Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
{castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
}
-Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
Type *I8Ptr = B.getInt8PtrTy();
return emitLibCall(LibFunc_strcpy, I8Ptr, {I8Ptr, I8Ptr},
{castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
}
-Value *llvm::emitStpCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+Value *llvm::emitStpCpy(Value *Dst, Value *Src, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
Type *I8Ptr = B.getInt8PtrTy();
return emitLibCall(LibFunc_stpcpy, I8Ptr, {I8Ptr, I8Ptr},
{castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
}
-Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
+Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
Type *I8Ptr = B.getInt8PtrTy();
return emitLibCall(LibFunc_strncpy, I8Ptr, {I8Ptr, I8Ptr, Len->getType()},
{castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI);
}
-Value *llvm::emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
+Value *llvm::emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
Type *I8Ptr = B.getInt8PtrTy();
return emitLibCall(LibFunc_stpncpy, I8Ptr, {I8Ptr, I8Ptr, Len->getType()},
@@ -903,7 +907,7 @@ Value *llvm::emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
}
Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
- IRBuilder<> &B, const DataLayout &DL,
+ IRBuilderBase &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc_memcpy_chk))
return nullptr;
@@ -926,7 +930,7 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
return CI;
}
-Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
+Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
LLVMContext &Context = B.GetInsertBlock()->getContext();
return emitLibCall(
@@ -935,7 +939,7 @@ Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
{castToCStr(Ptr, B), Val, Len}, B, TLI);
}
-Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
LLVMContext &Context = B.GetInsertBlock()->getContext();
return emitLibCall(
@@ -944,7 +948,7 @@ Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
{castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
}
-Value *llvm::emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+Value *llvm::emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
LLVMContext &Context = B.GetInsertBlock()->getContext();
return emitLibCall(
@@ -954,7 +958,7 @@ Value *llvm::emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
}
Value *llvm::emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len,
- IRBuilder<> &B, const TargetLibraryInfo *TLI) {
+ IRBuilderBase &B, const TargetLibraryInfo *TLI) {
return emitLibCall(
LibFunc_memccpy, B.getInt8PtrTy(),
{B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), Len->getType()},
@@ -962,7 +966,7 @@ Value *llvm::emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len,
}
Value *llvm::emitSNPrintf(Value *Dest, Value *Size, Value *Fmt,
- ArrayRef<Value *> VariadicArgs, IRBuilder<> &B,
+ ArrayRef<Value *> VariadicArgs, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
SmallVector<Value *, 8> Args{castToCStr(Dest, B), Size, castToCStr(Fmt, B)};
Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end());
@@ -972,7 +976,7 @@ Value *llvm::emitSNPrintf(Value *Dest, Value *Size, Value *Fmt,
}
Value *llvm::emitSPrintf(Value *Dest, Value *Fmt,
- ArrayRef<Value *> VariadicArgs, IRBuilder<> &B,
+ ArrayRef<Value *> VariadicArgs, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
SmallVector<Value *, 8> Args{castToCStr(Dest, B), castToCStr(Fmt, B)};
Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end());
@@ -981,28 +985,28 @@ Value *llvm::emitSPrintf(Value *Dest, Value *Fmt,
/*IsVaArgs=*/true);
}
-Value *llvm::emitStrCat(Value *Dest, Value *Src, IRBuilder<> &B,
+Value *llvm::emitStrCat(Value *Dest, Value *Src, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
return emitLibCall(LibFunc_strcat, B.getInt8PtrTy(),
{B.getInt8PtrTy(), B.getInt8PtrTy()},
{castToCStr(Dest, B), castToCStr(Src, B)}, B, TLI);
}
-Value *llvm::emitStrLCpy(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+Value *llvm::emitStrLCpy(Value *Dest, Value *Src, Value *Size, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
return emitLibCall(LibFunc_strlcpy, Size->getType(),
{B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()},
{castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
}
-Value *llvm::emitStrLCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+Value *llvm::emitStrLCat(Value *Dest, Value *Src, Value *Size, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
return emitLibCall(LibFunc_strlcat, Size->getType(),
{B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()},
{castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
}
-Value *llvm::emitStrNCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+Value *llvm::emitStrNCat(Value *Dest, Value *Src, Value *Size, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
return emitLibCall(LibFunc_strncat, B.getInt8PtrTy(),
{B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()},
@@ -1010,7 +1014,7 @@ Value *llvm::emitStrNCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
}
Value *llvm::emitVSNPrintf(Value *Dest, Value *Size, Value *Fmt, Value *VAList,
- IRBuilder<> &B, const TargetLibraryInfo *TLI) {
+ IRBuilderBase &B, const TargetLibraryInfo *TLI) {
return emitLibCall(
LibFunc_vsnprintf, B.getInt32Ty(),
{B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy(), VAList->getType()},
@@ -1018,7 +1022,7 @@ Value *llvm::emitVSNPrintf(Value *Dest, Value *Size, Value *Fmt, Value *VAList,
}
Value *llvm::emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList,
- IRBuilder<> &B, const TargetLibraryInfo *TLI) {
+ IRBuilderBase &B, const TargetLibraryInfo *TLI) {
return emitLibCall(LibFunc_vsprintf, B.getInt32Ty(),
{B.getInt8PtrTy(), B.getInt8PtrTy(), VAList->getType()},
{castToCStr(Dest, B), castToCStr(Fmt, B), VAList}, B, TLI);
@@ -1040,7 +1044,7 @@ static void appendTypeSuffix(Value *Op, StringRef &Name,
}
static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name,
- IRBuilder<> &B,
+ IRBuilderBase &B,
const AttributeList &Attrs) {
assert((Name != "") && "Must specify Name to emitUnaryFloatFnCall");
@@ -1062,7 +1066,7 @@ static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name,
return CI;
}
-Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
+Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilderBase &B,
const AttributeList &Attrs) {
SmallString<20> NameBuffer;
appendTypeSuffix(Op, Name, NameBuffer);
@@ -1072,7 +1076,7 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
LibFunc DoubleFn, LibFunc FloatFn,
- LibFunc LongDoubleFn, IRBuilder<> &B,
+ LibFunc LongDoubleFn, IRBuilderBase &B,
const AttributeList &Attrs) {
// Get the name of the function according to TLI.
StringRef Name = getFloatFnName(TLI, Op->getType(),
@@ -1082,7 +1086,7 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
}
static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2,
- StringRef Name, IRBuilder<> &B,
+ StringRef Name, IRBuilderBase &B,
const AttributeList &Attrs) {
assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall");
@@ -1105,7 +1109,8 @@ static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2,
}
Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
- IRBuilder<> &B, const AttributeList &Attrs) {
+ IRBuilderBase &B,
+ const AttributeList &Attrs) {
assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall");
SmallString<20> NameBuffer;
@@ -1117,7 +1122,7 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2,
const TargetLibraryInfo *TLI,
LibFunc DoubleFn, LibFunc FloatFn,
- LibFunc LongDoubleFn, IRBuilder<> &B,
+ LibFunc LongDoubleFn, IRBuilderBase &B,
const AttributeList &Attrs) {
// Get the name of the function according to TLI.
StringRef Name = getFloatFnName(TLI, Op1->getType(),
@@ -1126,7 +1131,7 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2,
return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs);
}
-Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B,
+Value *llvm::emitPutChar(Value *Char, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc_putchar))
return nullptr;
@@ -1149,7 +1154,7 @@ Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B,
return CI;
}
-Value *llvm::emitPutS(Value *Str, IRBuilder<> &B,
+Value *llvm::emitPutS(Value *Str, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc_puts))
return nullptr;
@@ -1166,7 +1171,7 @@ Value *llvm::emitPutS(Value *Str, IRBuilder<> &B,
return CI;
}
-Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc_fputc))
return nullptr;
@@ -1187,27 +1192,7 @@ Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B,
return CI;
}
-Value *llvm::emitFPutCUnlocked(Value *Char, Value *File, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_fputc_unlocked))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef FPutcUnlockedName = TLI->getName(LibFunc_fputc_unlocked);
- FunctionCallee F = M->getOrInsertFunction(FPutcUnlockedName, B.getInt32Ty(),
- B.getInt32Ty(), File->getType());
- if (File->getType()->isPointerTy())
- inferLibFuncAttributes(M, FPutcUnlockedName, *TLI);
- Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/ true, "chari");
- CallInst *CI = B.CreateCall(F, {Char, File}, FPutcUnlockedName);
-
- if (const Function *Fn =
- dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
- CI->setCallingConv(Fn->getCallingConv());
- return CI;
-}
-
-Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B,
+Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc_fputs))
return nullptr;
@@ -1226,26 +1211,7 @@ Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B,
return CI;
}
-Value *llvm::emitFPutSUnlocked(Value *Str, Value *File, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_fputs_unlocked))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef FPutsUnlockedName = TLI->getName(LibFunc_fputs_unlocked);
- FunctionCallee F = M->getOrInsertFunction(FPutsUnlockedName, B.getInt32Ty(),
- B.getInt8PtrTy(), File->getType());
- if (File->getType()->isPointerTy())
- inferLibFuncAttributes(M, FPutsUnlockedName, *TLI);
- CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsUnlockedName);
-
- if (const Function *Fn =
- dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
- CI->setCallingConv(Fn->getCallingConv());
- return CI;
-}
-
-Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
+Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc_fwrite))
return nullptr;
@@ -1269,7 +1235,7 @@ Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
return CI;
}
-Value *llvm::emitMalloc(Value *Num, IRBuilder<> &B, const DataLayout &DL,
+Value *llvm::emitMalloc(Value *Num, IRBuilderBase &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc_malloc))
return nullptr;
@@ -1290,7 +1256,7 @@ Value *llvm::emitMalloc(Value *Num, IRBuilder<> &B, const DataLayout &DL,
}
Value *llvm::emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
- IRBuilder<> &B, const TargetLibraryInfo &TLI) {
+ IRBuilderBase &B, const TargetLibraryInfo &TLI) {
if (!TLI.has(LibFunc_calloc))
return nullptr;
@@ -1309,88 +1275,3 @@ Value *llvm::emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
return CI;
}
-
-Value *llvm::emitFWriteUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
- IRBuilder<> &B, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_fwrite_unlocked))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- LLVMContext &Context = B.GetInsertBlock()->getContext();
- StringRef FWriteUnlockedName = TLI->getName(LibFunc_fwrite_unlocked);
- FunctionCallee F = M->getOrInsertFunction(
- FWriteUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
- DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
-
- if (File->getType()->isPointerTy())
- inferLibFuncAttributes(M, FWriteUnlockedName, *TLI);
- CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File});
-
- if (const Function *Fn =
- dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
- CI->setCallingConv(Fn->getCallingConv());
- return CI;
-}
-
-Value *llvm::emitFGetCUnlocked(Value *File, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_fgetc_unlocked))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef FGetCUnlockedName = TLI->getName(LibFunc_fgetc_unlocked);
- FunctionCallee F = M->getOrInsertFunction(FGetCUnlockedName, B.getInt32Ty(),
- File->getType());
- if (File->getType()->isPointerTy())
- inferLibFuncAttributes(M, FGetCUnlockedName, *TLI);
- CallInst *CI = B.CreateCall(F, File, FGetCUnlockedName);
-
- if (const Function *Fn =
- dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
- CI->setCallingConv(Fn->getCallingConv());
- return CI;
-}
-
-Value *llvm::emitFGetSUnlocked(Value *Str, Value *Size, Value *File,
- IRBuilder<> &B, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_fgets_unlocked))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef FGetSUnlockedName = TLI->getName(LibFunc_fgets_unlocked);
- FunctionCallee F =
- M->getOrInsertFunction(FGetSUnlockedName, B.getInt8PtrTy(),
- B.getInt8PtrTy(), B.getInt32Ty(), File->getType());
- inferLibFuncAttributes(M, FGetSUnlockedName, *TLI);
- CallInst *CI =
- B.CreateCall(F, {castToCStr(Str, B), Size, File}, FGetSUnlockedName);
-
- if (const Function *Fn =
- dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
- CI->setCallingConv(Fn->getCallingConv());
- return CI;
-}
-
-Value *llvm::emitFReadUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
- IRBuilder<> &B, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_fread_unlocked))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- LLVMContext &Context = B.GetInsertBlock()->getContext();
- StringRef FReadUnlockedName = TLI->getName(LibFunc_fread_unlocked);
- FunctionCallee F = M->getOrInsertFunction(
- FReadUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
- DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
-
- if (File->getType()->isPointerTy())
- inferLibFuncAttributes(M, FReadUnlockedName, *TLI);
- CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File});
-
- if (const Function *Fn =
- dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
- CI->setCallingConv(Fn->getCallingConv());
- return CI;
-}
diff --git a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
index 9a6761040bd89..833d04210629d 100644
--- a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -213,9 +213,8 @@ bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
return false;
// Do not visit nodes that have been visited already. We return true because
// it means that we couldn't find any value that doesn't look hash-like.
- if (Visited.find(I) != Visited.end())
+ if (!Visited.insert(I).second)
return true;
- Visited.insert(I);
return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
// Ignore undef values as they probably don't affect the division
// operands.
@@ -264,6 +263,7 @@ QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
MainBB->getParent(), SuccessorBB);
IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
+ Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
Value *Dividend = SlowDivOrRem->getOperand(0);
Value *Divisor = SlowDivOrRem->getOperand(1);
@@ -287,6 +287,7 @@ QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
MainBB->getParent(), SuccessorBB);
IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
+ Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
Value *Dividend = SlowDivOrRem->getOperand(0);
Value *Divisor = SlowDivOrRem->getOperand(1);
@@ -312,6 +313,7 @@ QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
QuotRemWithBB &RHS,
BasicBlock *PhiBB) {
IRBuilder<> Builder(PhiBB, PhiBB->begin());
+ Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
@@ -328,6 +330,7 @@ QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
assert((Op1 || Op2) && "Nothing to check");
IRBuilder<> Builder(MainBB, MainBB->end());
+ Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
Value *OrV;
if (Op1 && Op2)
@@ -396,6 +399,9 @@ Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
isa<ConstantInt>(BCI->getOperand(0)))
return None;
+ IRBuilder<> Builder(MainBB, MainBB->end());
+ Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
+
if (DividendShort && !isSignedOp()) {
// If the division is unsigned and Dividend is known to be short, then
// either
@@ -418,7 +424,6 @@ Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
Long.Remainder = Dividend;
QuotRemWithBB Fast = createFastBB(SuccessorBB);
QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
- IRBuilder<> Builder(MainBB, MainBB->end());
Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
return Result;
@@ -435,7 +440,6 @@ Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
DivisorShort ? nullptr : Divisor);
- IRBuilder<> Builder(MainBB, MainBB->end());
Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
return Result;
}
diff --git a/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp b/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp
new file mode 100644
index 0000000000000..52e859361c598
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp
@@ -0,0 +1,167 @@
+//===- CallGraphUpdater.cpp - A (lazy) call graph update helper -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides interfaces used to manipulate a call graph, regardless
+/// if it is a "old style" CallGraph or an "new style" LazyCallGraph.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CallGraphUpdater.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+bool CallGraphUpdater::finalize() {
+ if (!DeadFunctionsInComdats.empty()) {
+ filterDeadComdatFunctions(*DeadFunctionsInComdats.front()->getParent(),
+ DeadFunctionsInComdats);
+ DeadFunctions.append(DeadFunctionsInComdats.begin(),
+ DeadFunctionsInComdats.end());
+ }
+
+ if (CG) {
+ // First remove all references, e.g., outgoing via called functions. This is
+ // necessary as we can delete functions that have circular references.
+ for (Function *DeadFn : DeadFunctions) {
+ DeadFn->removeDeadConstantUsers();
+ CallGraphNode *DeadCGN = (*CG)[DeadFn];
+ DeadCGN->removeAllCalledFunctions();
+ CG->getExternalCallingNode()->removeAnyCallEdgeTo(DeadCGN);
+ DeadFn->replaceAllUsesWith(UndefValue::get(DeadFn->getType()));
+ }
+
+ // Then remove the node and function from the module.
+ for (Function *DeadFn : DeadFunctions) {
+ CallGraphNode *DeadCGN = CG->getOrInsertFunction(DeadFn);
+ assert(DeadCGN->getNumReferences() == 0 &&
+ "References should have been handled by now");
+ delete CG->removeFunctionFromModule(DeadCGN);
+ }
+ } else {
+ // This is the code path for the new lazy call graph and for the case were
+ // no call graph was provided.
+ for (Function *DeadFn : DeadFunctions) {
+ DeadFn->removeDeadConstantUsers();
+ DeadFn->replaceAllUsesWith(UndefValue::get(DeadFn->getType()));
+
+ if (LCG && !ReplacedFunctions.count(DeadFn)) {
+ // Taken mostly from the inliner:
+ LazyCallGraph::Node &N = LCG->get(*DeadFn);
+ auto *DeadSCC = LCG->lookupSCC(N);
+ assert(DeadSCC && DeadSCC->size() == 1 &&
+ &DeadSCC->begin()->getFunction() == DeadFn);
+ auto &DeadRC = DeadSCC->getOuterRefSCC();
+
+ FunctionAnalysisManager &FAM =
+ AM->getResult<FunctionAnalysisManagerCGSCCProxy>(*DeadSCC, *LCG)
+ .getManager();
+
+ FAM.clear(*DeadFn, DeadFn->getName());
+ AM->clear(*DeadSCC, DeadSCC->getName());
+ LCG->removeDeadFunction(*DeadFn);
+
+ // Mark the relevant parts of the call graph as invalid so we don't
+ // visit them.
+ UR->InvalidatedSCCs.insert(DeadSCC);
+ UR->InvalidatedRefSCCs.insert(&DeadRC);
+ }
+
+ // The function is now really dead and de-attached from everything.
+ DeadFn->eraseFromParent();
+ }
+ }
+
+ bool Changed = !DeadFunctions.empty();
+ DeadFunctionsInComdats.clear();
+ DeadFunctions.clear();
+ return Changed;
+}
+
+void CallGraphUpdater::reanalyzeFunction(Function &Fn) {
+ if (CG) {
+ CallGraphNode *OldCGN = CG->getOrInsertFunction(&Fn);
+ OldCGN->removeAllCalledFunctions();
+ CG->populateCallGraphNode(OldCGN);
+ } else if (LCG) {
+ LazyCallGraph::Node &N = LCG->get(Fn);
+ LazyCallGraph::SCC *C = LCG->lookupSCC(N);
+ updateCGAndAnalysisManagerForCGSCCPass(*LCG, *C, N, *AM, *UR, *FAM);
+ }
+}
+
+void CallGraphUpdater::registerOutlinedFunction(Function &NewFn) {
+ if (CG)
+ CG->addToCallGraph(&NewFn);
+ else if (LCG)
+ LCG->addNewFunctionIntoSCC(NewFn, *SCC);
+}
+
+void CallGraphUpdater::removeFunction(Function &DeadFn) {
+ DeadFn.deleteBody();
+ DeadFn.setLinkage(GlobalValue::ExternalLinkage);
+ if (DeadFn.hasComdat())
+ DeadFunctionsInComdats.push_back(&DeadFn);
+ else
+ DeadFunctions.push_back(&DeadFn);
+
+ // For the old call graph we remove the function from the SCC right away.
+ if (CG && !ReplacedFunctions.count(&DeadFn)) {
+ CallGraphNode *DeadCGN = (*CG)[&DeadFn];
+ DeadCGN->removeAllCalledFunctions();
+ CGSCC->DeleteNode(DeadCGN);
+ }
+}
+
+void CallGraphUpdater::replaceFunctionWith(Function &OldFn, Function &NewFn) {
+ OldFn.removeDeadConstantUsers();
+ ReplacedFunctions.insert(&OldFn);
+ if (CG) {
+ // Update the call graph for the newly promoted function.
+ CallGraphNode *OldCGN = (*CG)[&OldFn];
+ CallGraphNode *NewCGN = CG->getOrInsertFunction(&NewFn);
+ NewCGN->stealCalledFunctionsFrom(OldCGN);
+ CG->ReplaceExternalCallEdge(OldCGN, NewCGN);
+
+ // And update the SCC we're iterating as well.
+ CGSCC->ReplaceNode(OldCGN, NewCGN);
+ } else if (LCG) {
+ // Directly substitute the functions in the call graph.
+ LazyCallGraph::Node &OldLCGN = LCG->get(OldFn);
+ SCC->getOuterRefSCC().replaceNodeFunction(OldLCGN, NewFn);
+ }
+ removeFunction(OldFn);
+}
+
+bool CallGraphUpdater::replaceCallSite(CallBase &OldCS, CallBase &NewCS) {
+ // This is only necessary in the (old) CG.
+ if (!CG)
+ return true;
+
+ Function *Caller = OldCS.getCaller();
+ CallGraphNode *NewCalleeNode =
+ CG->getOrInsertFunction(NewCS.getCalledFunction());
+ CallGraphNode *CallerNode = (*CG)[Caller];
+ if (llvm::none_of(*CallerNode, [&OldCS](const CallGraphNode::CallRecord &CR) {
+ return CR.first && *CR.first == &OldCS;
+ }))
+ return false;
+ CallerNode->replaceCallEdge(OldCS, NewCS, NewCalleeNode);
+ return true;
+}
+
+void CallGraphUpdater::removeCallSite(CallBase &CS) {
+ // This is only necessary in the (old) CG.
+ if (!CG)
+ return;
+
+ Function *Caller = CS.getCaller();
+ CallGraphNode *CallerNode = (*CG)[Caller];
+ CallerNode->removeCallEdgeFor(CS);
+}
diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index f04d76e70c0da..5a47c1fd0b6cb 100644
--- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -12,7 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
@@ -158,32 +161,31 @@ static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst,
/// %t1 = bitcast i32 %t0 to ...
/// br label %normal_dst
///
-static void createRetBitCast(CallSite CS, Type *RetTy, CastInst **RetBitCast) {
+static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
// Save the users of the calling instruction. These uses will be changed to
// use the bitcast after we create it.
SmallVector<User *, 16> UsersToUpdate;
- for (User *U : CS.getInstruction()->users())
+ for (User *U : CB.users())
UsersToUpdate.push_back(U);
// Determine an appropriate location to create the bitcast for the return
// value. The location depends on if we have a call or invoke instruction.
Instruction *InsertBefore = nullptr;
- if (auto *Invoke = dyn_cast<InvokeInst>(CS.getInstruction()))
+ if (auto *Invoke = dyn_cast<InvokeInst>(&CB))
InsertBefore =
&SplitEdge(Invoke->getParent(), Invoke->getNormalDest())->front();
else
- InsertBefore = &*std::next(CS.getInstruction()->getIterator());
+ InsertBefore = &*std::next(CB.getIterator());
// Bitcast the return value to the correct type.
- auto *Cast = CastInst::CreateBitOrPointerCast(CS.getInstruction(), RetTy, "",
- InsertBefore);
+ auto *Cast = CastInst::CreateBitOrPointerCast(&CB, RetTy, "", InsertBefore);
if (RetBitCast)
*RetBitCast = Cast;
// Replace all the original uses of the calling instruction with the bitcast.
for (User *U : UsersToUpdate)
- U->replaceUsesOfWith(CS.getInstruction(), Cast);
+ U->replaceUsesOfWith(&CB, Cast);
}
/// Predicate and clone the given call site.
@@ -253,26 +255,91 @@ static void createRetBitCast(CallSite CS, Type *RetTy, CastInst **RetBitCast) {
/// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ]
/// br %normal_dst
///
-static Instruction *versionCallSite(CallSite CS, Value *Callee,
- MDNode *BranchWeights) {
-
- IRBuilder<> Builder(CS.getInstruction());
- Instruction *OrigInst = CS.getInstruction();
+/// An indirect musttail call is processed slightly differently in that:
+/// 1. No merge block needed for the orginal and the cloned callsite, since
+/// either one ends the flow. No phi node is needed either.
+/// 2. The return statement following the original call site is duplicated too
+/// and placed immediately after the cloned call site per the IR convention.
+///
+/// For example, the musttail call instruction below:
+///
+/// orig_bb:
+/// %t0 = musttail call i32 %ptr()
+/// ...
+///
+/// Is replaced by the following:
+///
+/// cond_bb:
+/// %cond = icmp eq i32 ()* %ptr, @func
+/// br i1 %cond, %then_bb, %orig_bb
+///
+/// then_bb:
+/// ; The clone of the original call instruction is placed in the "then"
+/// ; block. It is not yet promoted.
+/// %t1 = musttail call i32 %ptr()
+/// ret %t1
+///
+/// orig_bb:
+/// ; The original call instruction stays in its original block.
+/// %t0 = musttail call i32 %ptr()
+/// ret %t0
+static CallBase &versionCallSite(CallBase &CB, Value *Callee,
+ MDNode *BranchWeights) {
+
+ IRBuilder<> Builder(&CB);
+ CallBase *OrigInst = &CB;
BasicBlock *OrigBlock = OrigInst->getParent();
// Create the compare. The called value and callee must have the same type to
// be compared.
- if (CS.getCalledValue()->getType() != Callee->getType())
- Callee = Builder.CreateBitCast(Callee, CS.getCalledValue()->getType());
- auto *Cond = Builder.CreateICmpEQ(CS.getCalledValue(), Callee);
+ if (CB.getCalledOperand()->getType() != Callee->getType())
+ Callee = Builder.CreateBitCast(Callee, CB.getCalledOperand()->getType());
+ auto *Cond = Builder.CreateICmpEQ(CB.getCalledOperand(), Callee);
+
+ if (OrigInst->isMustTailCall()) {
+ // Create an if-then structure. The original instruction stays in its block,
+ // and a clone of the original instruction is placed in the "then" block.
+ Instruction *ThenTerm =
+ SplitBlockAndInsertIfThen(Cond, &CB, false, BranchWeights);
+ BasicBlock *ThenBlock = ThenTerm->getParent();
+ ThenBlock->setName("if.true.direct_targ");
+ CallBase *NewInst = cast<CallBase>(OrigInst->clone());
+ NewInst->insertBefore(ThenTerm);
+
+ // Place a clone of the optional bitcast after the new call site.
+ Value *NewRetVal = NewInst;
+ auto Next = OrigInst->getNextNode();
+ if (auto *BitCast = dyn_cast_or_null<BitCastInst>(Next)) {
+ assert(BitCast->getOperand(0) == OrigInst &&
+ "bitcast following musttail call must use the call");
+ auto NewBitCast = BitCast->clone();
+ NewBitCast->replaceUsesOfWith(OrigInst, NewInst);
+ NewBitCast->insertBefore(ThenTerm);
+ NewRetVal = NewBitCast;
+ Next = BitCast->getNextNode();
+ }
+
+ // Place a clone of the return instruction after the new call site.
+ ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(Next);
+ assert(Ret && "musttail call must precede a ret with an optional bitcast");
+ auto NewRet = Ret->clone();
+ if (Ret->getReturnValue())
+ NewRet->replaceUsesOfWith(Ret->getReturnValue(), NewRetVal);
+ NewRet->insertBefore(ThenTerm);
+
+ // A return instructions is terminating, so we don't need the terminator
+ // instruction just created.
+ ThenTerm->eraseFromParent();
+
+ return *NewInst;
+ }
// Create an if-then-else structure. The original instruction is moved into
// the "else" block, and a clone of the original instruction is placed in the
// "then" block.
Instruction *ThenTerm = nullptr;
Instruction *ElseTerm = nullptr;
- SplitBlockAndInsertIfThenElse(Cond, CS.getInstruction(), &ThenTerm, &ElseTerm,
- BranchWeights);
+ SplitBlockAndInsertIfThenElse(Cond, &CB, &ThenTerm, &ElseTerm, BranchWeights);
BasicBlock *ThenBlock = ThenTerm->getParent();
BasicBlock *ElseBlock = ElseTerm->getParent();
BasicBlock *MergeBlock = OrigInst->getParent();
@@ -281,7 +348,7 @@ static Instruction *versionCallSite(CallSite CS, Value *Callee,
ElseBlock->setName("if.false.orig_indirect");
MergeBlock->setName("if.end.icp");
- Instruction *NewInst = OrigInst->clone();
+ CallBase *NewInst = cast<CallBase>(OrigInst->clone());
OrigInst->moveBefore(ElseTerm);
NewInst->insertBefore(ThenTerm);
@@ -313,18 +380,18 @@ static Instruction *versionCallSite(CallSite CS, Value *Callee,
// Create a phi node for the returned value of the call site.
createRetPHINode(OrigInst, NewInst, MergeBlock, Builder);
- return NewInst;
+ return *NewInst;
}
-bool llvm::isLegalToPromote(CallSite CS, Function *Callee,
+bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee,
const char **FailureReason) {
- assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted");
+ assert(!CB.getCalledFunction() && "Only indirect call sites can be promoted");
auto &DL = Callee->getParent()->getDataLayout();
// Check the return type. The callee's return value type must be bitcast
// compatible with the call site's type.
- Type *CallRetTy = CS.getInstruction()->getType();
+ Type *CallRetTy = CB.getType();
Type *FuncRetTy = Callee->getReturnType();
if (CallRetTy != FuncRetTy)
if (!CastInst::isBitOrNoopPointerCastable(FuncRetTy, CallRetTy, DL)) {
@@ -336,9 +403,12 @@ bool llvm::isLegalToPromote(CallSite CS, Function *Callee,
// The number of formal arguments of the callee.
unsigned NumParams = Callee->getFunctionType()->getNumParams();
+ // The number of actual arguments in the call.
+ unsigned NumArgs = CB.arg_size();
+
// Check the number of arguments. The callee and call site must agree on the
// number of arguments.
- if (CS.arg_size() != NumParams && !Callee->isVarArg()) {
+ if (NumArgs != NumParams && !Callee->isVarArg()) {
if (FailureReason)
*FailureReason = "The number of arguments mismatch";
return false;
@@ -347,9 +417,10 @@ bool llvm::isLegalToPromote(CallSite CS, Function *Callee,
// Check the argument types. The callee's formal argument types must be
// bitcast compatible with the corresponding actual argument types of the call
// site.
- for (unsigned I = 0; I < NumParams; ++I) {
+ unsigned I = 0;
+ for (; I < NumParams; ++I) {
Type *FormalTy = Callee->getFunctionType()->getFunctionParamType(I);
- Type *ActualTy = CS.getArgument(I)->getType();
+ Type *ActualTy = CB.getArgOperand(I)->getType();
if (FormalTy == ActualTy)
continue;
if (!CastInst::isBitOrNoopPointerCastable(ActualTy, FormalTy, DL)) {
@@ -358,35 +429,43 @@ bool llvm::isLegalToPromote(CallSite CS, Function *Callee,
return false;
}
}
+ for (; I < NumArgs; I++) {
+ // Vararg functions can have more arguments than paramters.
+ assert(Callee->isVarArg());
+ if (CB.paramHasAttr(I, Attribute::StructRet)) {
+ *FailureReason = "SRet arg to vararg function";
+ return false;
+ }
+ }
return true;
}
-Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
- CastInst **RetBitCast) {
- assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted");
+CallBase &llvm::promoteCall(CallBase &CB, Function *Callee,
+ CastInst **RetBitCast) {
+ assert(!CB.getCalledFunction() && "Only indirect call sites can be promoted");
// Set the called function of the call site to be the given callee (but don't
// change the type).
- cast<CallBase>(CS.getInstruction())->setCalledOperand(Callee);
+ CB.setCalledOperand(Callee);
// Since the call site will no longer be direct, we must clear metadata that
// is only appropriate for indirect calls. This includes !prof and !callees
// metadata.
- CS.getInstruction()->setMetadata(LLVMContext::MD_prof, nullptr);
- CS.getInstruction()->setMetadata(LLVMContext::MD_callees, nullptr);
+ CB.setMetadata(LLVMContext::MD_prof, nullptr);
+ CB.setMetadata(LLVMContext::MD_callees, nullptr);
// If the function type of the call site matches that of the callee, no
// additional work is required.
- if (CS.getFunctionType() == Callee->getFunctionType())
- return CS.getInstruction();
+ if (CB.getFunctionType() == Callee->getFunctionType())
+ return CB;
// Save the return types of the call site and callee.
- Type *CallSiteRetTy = CS.getInstruction()->getType();
+ Type *CallSiteRetTy = CB.getType();
Type *CalleeRetTy = Callee->getReturnType();
// Change the function type of the call site the match that of the callee.
- CS.mutateFunctionType(Callee->getFunctionType());
+ CB.mutateFunctionType(Callee->getFunctionType());
// Inspect the arguments of the call site. If an argument's type doesn't
// match the corresponding formal argument's type in the callee, bitcast it
@@ -395,19 +474,18 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
auto CalleeParamNum = CalleeType->getNumParams();
LLVMContext &Ctx = Callee->getContext();
- const AttributeList &CallerPAL = CS.getAttributes();
+ const AttributeList &CallerPAL = CB.getAttributes();
// The new list of argument attributes.
SmallVector<AttributeSet, 4> NewArgAttrs;
bool AttributeChanged = false;
for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) {
- auto *Arg = CS.getArgument(ArgNo);
+ auto *Arg = CB.getArgOperand(ArgNo);
Type *FormalTy = CalleeType->getParamType(ArgNo);
Type *ActualTy = Arg->getType();
if (FormalTy != ActualTy) {
- auto *Cast = CastInst::CreateBitOrPointerCast(Arg, FormalTy, "",
- CS.getInstruction());
- CS.setArgument(ArgNo, Cast);
+ auto *Cast = CastInst::CreateBitOrPointerCast(Arg, FormalTy, "", &CB);
+ CB.setArgOperand(ArgNo, Cast);
// Remove any incompatible attributes for the argument.
AttrBuilder ArgAttrs(CallerPAL.getParamAttributes(ArgNo));
@@ -432,30 +510,89 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
// Remove any incompatible return value attribute.
AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy) {
- createRetBitCast(CS, CallSiteRetTy, RetBitCast);
+ createRetBitCast(CB, CallSiteRetTy, RetBitCast);
RAttrs.remove(AttributeFuncs::typeIncompatible(CalleeRetTy));
AttributeChanged = true;
}
// Set the new callsite attribute.
if (AttributeChanged)
- CS.setAttributes(AttributeList::get(Ctx, CallerPAL.getFnAttributes(),
+ CB.setAttributes(AttributeList::get(Ctx, CallerPAL.getFnAttributes(),
AttributeSet::get(Ctx, RAttrs),
NewArgAttrs));
- return CS.getInstruction();
+ return CB;
}
-Instruction *llvm::promoteCallWithIfThenElse(CallSite CS, Function *Callee,
- MDNode *BranchWeights) {
+CallBase &llvm::promoteCallWithIfThenElse(CallBase &CB, Function *Callee,
+ MDNode *BranchWeights) {
// Version the indirect call site. If the called value is equal to the given
// callee, 'NewInst' will be executed, otherwise the original call site will
// be executed.
- Instruction *NewInst = versionCallSite(CS, Callee, BranchWeights);
+ CallBase &NewInst = versionCallSite(CB, Callee, BranchWeights);
// Promote 'NewInst' so that it directly calls the desired function.
- return promoteCall(CallSite(NewInst), Callee);
+ return promoteCall(NewInst, Callee);
+}
+
+bool llvm::tryPromoteCall(CallBase &CB) {
+ assert(!CB.getCalledFunction());
+ Module *M = CB.getCaller()->getParent();
+ const DataLayout &DL = M->getDataLayout();
+ Value *Callee = CB.getCalledOperand();
+
+ LoadInst *VTableEntryLoad = dyn_cast<LoadInst>(Callee);
+ if (!VTableEntryLoad)
+ return false; // Not a vtable entry load.
+ Value *VTableEntryPtr = VTableEntryLoad->getPointerOperand();
+ APInt VTableOffset(DL.getTypeSizeInBits(VTableEntryPtr->getType()), 0);
+ Value *VTableBasePtr = VTableEntryPtr->stripAndAccumulateConstantOffsets(
+ DL, VTableOffset, /* AllowNonInbounds */ true);
+ LoadInst *VTablePtrLoad = dyn_cast<LoadInst>(VTableBasePtr);
+ if (!VTablePtrLoad)
+ return false; // Not a vtable load.
+ Value *Object = VTablePtrLoad->getPointerOperand();
+ APInt ObjectOffset(DL.getTypeSizeInBits(Object->getType()), 0);
+ Value *ObjectBase = Object->stripAndAccumulateConstantOffsets(
+ DL, ObjectOffset, /* AllowNonInbounds */ true);
+ if (!(isa<AllocaInst>(ObjectBase) && ObjectOffset == 0))
+ // Not an Alloca or the offset isn't zero.
+ return false;
+
+ // Look for the vtable pointer store into the object by the ctor.
+ BasicBlock::iterator BBI(VTablePtrLoad);
+ Value *VTablePtr = FindAvailableLoadedValue(
+ VTablePtrLoad, VTablePtrLoad->getParent(), BBI, 0, nullptr, nullptr);
+ if (!VTablePtr)
+ return false; // No vtable found.
+ APInt VTableOffsetGVBase(DL.getTypeSizeInBits(VTablePtr->getType()), 0);
+ Value *VTableGVBase = VTablePtr->stripAndAccumulateConstantOffsets(
+ DL, VTableOffsetGVBase, /* AllowNonInbounds */ true);
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(VTableGVBase);
+ if (!(GV && GV->isConstant() && GV->hasDefinitiveInitializer()))
+ // Not in the form of a global constant variable with an initializer.
+ return false;
+
+ Constant *VTableGVInitializer = GV->getInitializer();
+ APInt VTableGVOffset = VTableOffsetGVBase + VTableOffset;
+ if (!(VTableGVOffset.getActiveBits() <= 64))
+ return false; // Out of range.
+ Constant *Ptr = getPointerAtOffset(VTableGVInitializer,
+ VTableGVOffset.getZExtValue(),
+ *M);
+ if (!Ptr)
+ return false; // No constant (function) pointer found.
+ Function *DirectCallee = dyn_cast<Function>(Ptr->stripPointerCasts());
+ if (!DirectCallee)
+ return false; // No function pointer found.
+
+ if (!isLegalToPromote(CB, DirectCallee))
+ return false;
+
+ // Success.
+ promoteCall(CB, DirectCallee);
+ return true;
}
#undef DEBUG_TYPE
diff --git a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
new file mode 100644
index 0000000000000..1ae17c64b8f6d
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
@@ -0,0 +1,250 @@
+//==- CanonicalizeFreezeInLoops - Canonicalize freezes in a loop-*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass canonicalizes freeze instructions in a loop by pushing them out to
+// the preheader.
+//
+// loop:
+// i = phi init, i.next
+// i.next = add nsw i, 1
+// i.next.fr = freeze i.next // push this out of this loop
+// use(i.next.fr)
+// br i1 (i.next <= N), loop, exit
+// =>
+// init.fr = freeze init
+// loop:
+// i = phi init.fr, i.next
+// i.next = add i, 1 // nsw is dropped here
+// use(i.next)
+// br i1 (i.next <= N), loop, exit
+//
+// Removing freezes from these chains help scalar evolution successfully analyze
+// expressions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "canon-freeze"
+
+namespace {
+
+class CanonicalizeFreezeInLoops : public LoopPass {
+public:
+ static char ID;
+
+ CanonicalizeFreezeInLoops();
+
+private:
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+class CanonicalizeFreezeInLoopsImpl {
+ Loop *L;
+ ScalarEvolution &SE;
+ DominatorTree &DT;
+
+ struct FrozenIndPHIInfo {
+ // A freeze instruction that uses an induction phi
+ FreezeInst *FI = nullptr;
+ // The induction phi, step instruction, the operand idx of StepInst which is
+ // a step value
+ PHINode *PHI;
+ BinaryOperator *StepInst;
+ unsigned StepValIdx = 0;
+
+ FrozenIndPHIInfo(PHINode *PHI, BinaryOperator *StepInst)
+ : PHI(PHI), StepInst(StepInst) {}
+ };
+
+ // Can freeze instruction be pushed into operands of I?
+ // In order to do this, I should not create a poison after I's flags are
+ // stripped.
+ bool canHandleInst(const Instruction *I) {
+ auto Opc = I->getOpcode();
+ // If add/sub/mul, drop nsw/nuw flags.
+ return Opc == Instruction::Add || Opc == Instruction::Sub ||
+ Opc == Instruction::Mul;
+ }
+
+ void InsertFreezeAndForgetFromSCEV(Use &U);
+
+public:
+ CanonicalizeFreezeInLoopsImpl(Loop *L, ScalarEvolution &SE, DominatorTree &DT)
+ : L(L), SE(SE), DT(DT) {}
+ bool run();
+};
+
+} // anonymous namespace
+
+// Given U = (value, user), replace value with freeze(value), and let
+// SCEV forget user. The inserted freeze is placed in the preheader.
+void CanonicalizeFreezeInLoopsImpl::InsertFreezeAndForgetFromSCEV(Use &U) {
+ auto *PH = L->getLoopPreheader();
+
+ auto *UserI = cast<Instruction>(U.getUser());
+ auto *ValueToFr = U.get();
+ assert(L->contains(UserI->getParent()) &&
+ "Should not process an instruction that isn't inside the loop");
+ if (isGuaranteedNotToBeUndefOrPoison(ValueToFr, UserI, &DT))
+ return;
+
+ LLVM_DEBUG(dbgs() << "canonfr: inserting freeze:\n");
+ LLVM_DEBUG(dbgs() << "\tUser: " << *U.getUser() << "\n");
+ LLVM_DEBUG(dbgs() << "\tOperand: " << *U.get() << "\n");
+
+ U.set(new FreezeInst(ValueToFr, ValueToFr->getName() + ".frozen",
+ PH->getTerminator()));
+
+ SE.forgetValue(UserI);
+}
+
+bool CanonicalizeFreezeInLoopsImpl::run() {
+ // The loop should be in LoopSimplify form.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
+ SmallVector<FrozenIndPHIInfo, 4> Candidates;
+
+ for (auto &PHI : L->getHeader()->phis()) {
+ InductionDescriptor ID;
+ if (!InductionDescriptor::isInductionPHI(&PHI, L, &SE, ID))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "canonfr: PHI: " << PHI << "\n");
+ FrozenIndPHIInfo Info(&PHI, ID.getInductionBinOp());
+ if (!Info.StepInst || !canHandleInst(Info.StepInst)) {
+ // The stepping instruction has unknown form.
+ // Ignore this PHI.
+ continue;
+ }
+
+ Info.StepValIdx = Info.StepInst->getOperand(0) == &PHI;
+ Value *StepV = Info.StepInst->getOperand(Info.StepValIdx);
+ if (auto *StepI = dyn_cast<Instruction>(StepV)) {
+ if (L->contains(StepI->getParent())) {
+ // The step value is inside the loop. Freezing step value will introduce
+ // another freeze into the loop, so skip this PHI.
+ continue;
+ }
+ }
+
+ auto Visit = [&](User *U) {
+ if (auto *FI = dyn_cast<FreezeInst>(U)) {
+ LLVM_DEBUG(dbgs() << "canonfr: found: " << *FI << "\n");
+ Info.FI = FI;
+ Candidates.push_back(Info);
+ }
+ };
+ for_each(PHI.users(), Visit);
+ for_each(Info.StepInst->users(), Visit);
+ }
+
+ if (Candidates.empty())
+ return false;
+
+ SmallSet<PHINode *, 8> ProcessedPHIs;
+ for (const auto &Info : Candidates) {
+ PHINode *PHI = Info.PHI;
+ if (!ProcessedPHIs.insert(Info.PHI).second)
+ continue;
+
+ BinaryOperator *StepI = Info.StepInst;
+ assert(StepI && "Step instruction should have been found");
+
+ // Drop flags from the step instruction.
+ if (!isGuaranteedNotToBeUndefOrPoison(StepI, StepI, &DT)) {
+ LLVM_DEBUG(dbgs() << "canonfr: drop flags: " << *StepI << "\n");
+ StepI->dropPoisonGeneratingFlags();
+ SE.forgetValue(StepI);
+ }
+
+ InsertFreezeAndForgetFromSCEV(StepI->getOperandUse(Info.StepValIdx));
+
+ unsigned OperandIdx =
+ PHI->getOperandNumForIncomingValue(PHI->getIncomingValue(0) == StepI);
+ InsertFreezeAndForgetFromSCEV(PHI->getOperandUse(OperandIdx));
+ }
+
+ // Finally, remove the old freeze instructions.
+ for (const auto &Item : Candidates) {
+ auto *FI = Item.FI;
+ LLVM_DEBUG(dbgs() << "canonfr: removing " << *FI << "\n");
+ SE.forgetValue(FI);
+ FI->replaceAllUsesWith(FI->getOperand(0));
+ FI->eraseFromParent();
+ }
+
+ return true;
+}
+
+CanonicalizeFreezeInLoops::CanonicalizeFreezeInLoops() : LoopPass(ID) {
+ initializeCanonicalizeFreezeInLoopsPass(*PassRegistry::getPassRegistry());
+}
+
+void CanonicalizeFreezeInLoops::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+bool CanonicalizeFreezeInLoops::runOnLoop(Loop *L, LPPassManager &) {
+ if (skipLoop(L))
+ return false;
+
+ auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ return CanonicalizeFreezeInLoopsImpl(L, SE, DT).run();
+}
+
+PreservedAnalyses
+CanonicalizeFreezeInLoopsPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ if (!CanonicalizeFreezeInLoopsImpl(&L, AR.SE, AR.DT).run())
+ return PreservedAnalyses::all();
+
+ return getLoopPassPreservedAnalyses();
+}
+
+INITIALIZE_PASS_BEGIN(CanonicalizeFreezeInLoops, "canon-freeze",
+ "Canonicalize Freeze Instructions in Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(CanonicalizeFreezeInLoops, "canon-freeze",
+ "Canonicalize Freeze Instructions in Loops", false, false)
+
+Pass *llvm::createCanonicalizeFreezeInLoopsPass() {
+ return new CanonicalizeFreezeInLoops();
+}
+
+char CanonicalizeFreezeInLoops::ID = 0;
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 75e8963303c24..788983c156903 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -46,7 +46,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
if (BB->hasName())
NewBB->setName(BB->getName() + NameSuffix);
- bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+ bool hasCalls = false, hasDynamicAllocas = false;
Module *TheModule = F ? F->getParent() : nullptr;
// Loop over all instructions, and copy them over.
@@ -62,18 +62,15 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
hasCalls |= (isa<CallInst>(I) && !isa<DbgInfoIntrinsic>(I));
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
- if (isa<ConstantInt>(AI->getArraySize()))
- hasStaticAllocas = true;
- else
+ if (!AI->isStaticAlloca()) {
hasDynamicAllocas = true;
+ }
}
}
if (CodeInfo) {
CodeInfo->ContainsCalls |= hasCalls;
CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
- CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
- BB != &BB->getParent()->getEntryBlock();
}
return NewBB;
}
@@ -367,8 +364,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
if (CodeInfo)
- if (auto CS = ImmutableCallSite(&*II))
- if (CS.hasOperandBundles())
+ if (auto *CB = dyn_cast<CallBase>(&*II))
+ if (CB->hasOperandBundles())
CodeInfo->OperandBundleCallSites.push_back(NewInst);
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
@@ -424,8 +421,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
VMap[OldTI] = NewInst; // Add instruction map to value.
if (CodeInfo)
- if (auto CS = ImmutableCallSite(OldTI))
- if (CS.hasOperandBundles())
+ if (auto *CB = dyn_cast<CallBase>(OldTI))
+ if (CB->hasOperandBundles())
CodeInfo->OperandBundleCallSites.push_back(NewInst);
// Recursively clone any reachable successor blocks.
@@ -619,8 +616,9 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// Skip over non-intrinsic callsites, we don't want to remove any nodes from
// the CGSCC.
- CallSite CS = CallSite(I);
- if (CS && CS.getCalledFunction() && !CS.getCalledFunction()->isIntrinsic())
+ CallBase *CB = dyn_cast<CallBase>(I);
+ if (CB && CB->getCalledFunction() &&
+ !CB->getCalledFunction()->isIntrinsic())
continue;
// See if this instruction simplifies.
@@ -804,8 +802,6 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
// Update LoopInfo.
NewLoop->addBasicBlockToLoop(NewBB, *LI);
- if (BB == CurLoop->getHeader())
- NewLoop->moveToHeader(NewBB);
// Add DominatorTree node. After seeing all blocks, update to correct
// IDom.
@@ -815,6 +811,11 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
}
for (BasicBlock *BB : OrigLoop->getBlocks()) {
+ // Update loop headers.
+ Loop *CurLoop = LI->getLoopFor(BB);
+ if (BB == CurLoop->getHeader())
+ LMap[CurLoop]->moveToHeader(cast<BasicBlock>(VMap[BB]));
+
// Update DominatorTree.
BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock();
DT->changeImmediateDominator(cast<BasicBlock>(VMap[BB]),
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 682af4a88d3e5..8cdbb9d356523 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -31,11 +31,14 @@
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -448,18 +451,24 @@ CodeExtractor::getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC,
for (User *U : Addr->users()) {
IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
if (IntrInst) {
+ // We don't model addresses with multiple start/end markers, but the
+ // markers do not need to be in the region.
if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) {
- // Do not handle the case where Addr has multiple start markers.
if (Info.LifeStart)
return {};
Info.LifeStart = IntrInst;
+ continue;
}
if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) {
if (Info.LifeEnd)
return {};
Info.LifeEnd = IntrInst;
+ continue;
}
- continue;
+ // At this point, permit debug uses outside of the region.
+ // This is fixed in a later call to fixupDebugInfoPostExtraction().
+ if (isa<DbgInfoIntrinsic>(IntrInst))
+ continue;
}
// Find untracked uses of the address, bail.
if (!definedInRegion(Blocks, U))
@@ -865,10 +874,13 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::NoAlias:
case Attribute::NoBuiltin:
case Attribute::NoCapture:
+ case Attribute::NoMerge:
case Attribute::NoReturn:
case Attribute::NoSync:
+ case Attribute::NoUndef:
case Attribute::None:
case Attribute::NonNull:
+ case Attribute::Preallocated:
case Attribute::ReadNone:
case Attribute::ReadOnly:
case Attribute::Returned:
@@ -884,6 +896,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::ZExt:
case Attribute::ImmArg:
case Attribute::EndAttrKinds:
+ case Attribute::EmptyKey:
+ case Attribute::TombstoneKey:
continue;
// Those attributes should be safe to propagate to the extracted function.
case Attribute::AlwaysInline:
@@ -898,6 +912,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::NonLazyBind:
case Attribute::NoRedZone:
case Attribute::NoUnwind:
+ case Attribute::NullPointerIsValid:
case Attribute::OptForFuzzing:
case Attribute::OptimizeNone:
case Attribute::OptimizeForSize:
@@ -1120,8 +1135,7 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
GetElementPtrInst *GEP = GetElementPtrInst::Create(
StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName());
codeReplacer->getInstList().push_back(GEP);
- StoreInst *SI = new StoreInst(StructValues[i], GEP);
- codeReplacer->getInstList().push_back(SI);
+ new StoreInst(StructValues[i], GEP, codeReplacer);
}
}
@@ -1164,9 +1178,9 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
Output = ReloadOutputs[i];
}
LoadInst *load = new LoadInst(outputs[i]->getType(), Output,
- outputs[i]->getName() + ".reload");
+ outputs[i]->getName() + ".reload",
+ codeReplacer);
Reloads.push_back(load);
- codeReplacer->getInstList().push_back(load);
std::vector<User *> Users(outputs[i]->user_begin(), outputs[i]->user_end());
for (unsigned u = 0, e = Users.size(); u != e; ++u) {
Instruction *inst = cast<Instruction>(Users[u]);
@@ -1351,6 +1365,9 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
// Block Frequency distribution with dummy node.
Distribution BranchDist;
+ SmallVector<BranchProbability, 4> EdgeProbabilities(
+ TI->getNumSuccessors(), BranchProbability::getUnknown());
+
// Add each of the frequencies of the successors.
for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) {
BlockNode ExitNode(i);
@@ -1358,12 +1375,14 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
if (ExitFreq != 0)
BranchDist.addExit(ExitNode, ExitFreq);
else
- BPI->setEdgeProbability(CodeReplacer, i, BranchProbability::getZero());
+ EdgeProbabilities[i] = BranchProbability::getZero();
}
// Check for no total weight.
- if (BranchDist.Total == 0)
+ if (BranchDist.Total == 0) {
+ BPI->setEdgeProbability(CodeReplacer, EdgeProbabilities);
return;
+ }
// Normalize the distribution so that they can fit in unsigned.
BranchDist.normalize();
@@ -1375,13 +1394,133 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
// Get the weight and update the current BFI.
BranchWeights[Weight.TargetNode.Index] = Weight.Amount;
BranchProbability BP(Weight.Amount, BranchDist.Total);
- BPI->setEdgeProbability(CodeReplacer, Weight.TargetNode.Index, BP);
+ EdgeProbabilities[Weight.TargetNode.Index] = BP;
}
+ BPI->setEdgeProbability(CodeReplacer, EdgeProbabilities);
TI->setMetadata(
LLVMContext::MD_prof,
MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
}
+/// Erase debug info intrinsics which refer to values in \p F but aren't in
+/// \p F.
+static void eraseDebugIntrinsicsWithNonLocalRefs(Function &F) {
+ for (Instruction &I : instructions(F)) {
+ SmallVector<DbgVariableIntrinsic *, 4> DbgUsers;
+ findDbgUsers(DbgUsers, &I);
+ for (DbgVariableIntrinsic *DVI : DbgUsers)
+ if (DVI->getFunction() != &F)
+ DVI->eraseFromParent();
+ }
+}
+
+/// Fix up the debug info in the old and new functions by pointing line
+/// locations and debug intrinsics to the new subprogram scope, and by deleting
+/// intrinsics which point to values outside of the new function.
+static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
+ CallInst &TheCall) {
+ DISubprogram *OldSP = OldFunc.getSubprogram();
+ LLVMContext &Ctx = OldFunc.getContext();
+
+ if (!OldSP) {
+ // Erase any debug info the new function contains.
+ stripDebugInfo(NewFunc);
+ // Make sure the old function doesn't contain any non-local metadata refs.
+ eraseDebugIntrinsicsWithNonLocalRefs(NewFunc);
+ return;
+ }
+
+ // Create a subprogram for the new function. Leave out a description of the
+ // function arguments, as the parameters don't correspond to anything at the
+ // source level.
+ assert(OldSP->getUnit() && "Missing compile unit for subprogram");
+ DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolvedNodes=*/false,
+ OldSP->getUnit());
+ auto SPType = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None));
+ DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition |
+ DISubprogram::SPFlagOptimized |
+ DISubprogram::SPFlagLocalToUnit;
+ auto NewSP = DIB.createFunction(
+ OldSP->getUnit(), NewFunc.getName(), NewFunc.getName(), OldSP->getFile(),
+ /*LineNo=*/0, SPType, /*ScopeLine=*/0, DINode::FlagZero, SPFlags);
+ NewFunc.setSubprogram(NewSP);
+
+ // Debug intrinsics in the new function need to be updated in one of two
+ // ways:
+ // 1) They need to be deleted, because they describe a value in the old
+ // function.
+ // 2) They need to point to fresh metadata, e.g. because they currently
+ // point to a variable in the wrong scope.
+ SmallDenseMap<DINode *, DINode *> RemappedMetadata;
+ SmallVector<Instruction *, 4> DebugIntrinsicsToDelete;
+ for (Instruction &I : instructions(NewFunc)) {
+ auto *DII = dyn_cast<DbgInfoIntrinsic>(&I);
+ if (!DII)
+ continue;
+
+ // Point the intrinsic to a fresh label within the new function.
+ if (auto *DLI = dyn_cast<DbgLabelInst>(&I)) {
+ DILabel *OldLabel = DLI->getLabel();
+ DINode *&NewLabel = RemappedMetadata[OldLabel];
+ if (!NewLabel)
+ NewLabel = DILabel::get(Ctx, NewSP, OldLabel->getName(),
+ OldLabel->getFile(), OldLabel->getLine());
+ DLI->setArgOperand(0, MetadataAsValue::get(Ctx, NewLabel));
+ continue;
+ }
+
+ // If the location isn't a constant or an instruction, delete the
+ // intrinsic.
+ auto *DVI = cast<DbgVariableIntrinsic>(DII);
+ Value *Location = DVI->getVariableLocation();
+ if (!Location ||
+ (!isa<Constant>(Location) && !isa<Instruction>(Location))) {
+ DebugIntrinsicsToDelete.push_back(DVI);
+ continue;
+ }
+
+ // If the variable location is an instruction but isn't in the new
+ // function, delete the intrinsic.
+ Instruction *LocationInst = dyn_cast<Instruction>(Location);
+ if (LocationInst && LocationInst->getFunction() != &NewFunc) {
+ DebugIntrinsicsToDelete.push_back(DVI);
+ continue;
+ }
+
+ // Point the intrinsic to a fresh variable within the new function.
+ DILocalVariable *OldVar = DVI->getVariable();
+ DINode *&NewVar = RemappedMetadata[OldVar];
+ if (!NewVar)
+ NewVar = DIB.createAutoVariable(
+ NewSP, OldVar->getName(), OldVar->getFile(), OldVar->getLine(),
+ OldVar->getType(), /*AlwaysPreserve=*/false, DINode::FlagZero,
+ OldVar->getAlignInBits());
+ DVI->setArgOperand(1, MetadataAsValue::get(Ctx, NewVar));
+ }
+ for (auto *DII : DebugIntrinsicsToDelete)
+ DII->eraseFromParent();
+ DIB.finalizeSubprogram(NewSP);
+
+ // Fix up the scope information attached to the line locations in the new
+ // function.
+ for (Instruction &I : instructions(NewFunc)) {
+ if (const DebugLoc &DL = I.getDebugLoc())
+ I.setDebugLoc(DebugLoc::get(DL.getLine(), DL.getCol(), NewSP));
+
+ // Loop info metadata may contain line locations. Fix them up.
+ auto updateLoopInfoLoc = [&Ctx,
+ NewSP](const DILocation &Loc) -> DILocation * {
+ return DILocation::get(Ctx, Loc.getLine(), Loc.getColumn(), NewSP,
+ nullptr);
+ };
+ updateLoopMetadataDebugLocations(I, updateLoopInfoLoc);
+ }
+ if (!TheCall.getDebugLoc())
+ TheCall.setDebugLoc(DebugLoc::get(0, 0, OldSP));
+
+ eraseDebugIntrinsicsWithNonLocalRefs(NewFunc);
+}
+
Function *
CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
if (!isEligible())
@@ -1405,13 +1544,19 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
}
}
- if (AC) {
- // Remove @llvm.assume calls that were moved to the new function from the
- // old function's assumption cache.
- for (BasicBlock *Block : Blocks)
- for (auto &I : *Block)
- if (match(&I, m_Intrinsic<Intrinsic::assume>()))
- AC->unregisterAssumption(cast<CallInst>(&I));
+ // Remove @llvm.assume calls that will be moved to the new function from the
+ // old function's assumption cache.
+ for (BasicBlock *Block : Blocks) {
+ for (auto It = Block->begin(), End = Block->end(); It != End;) {
+ Instruction *I = &*It;
+ ++It;
+
+ if (match(I, m_Intrinsic<Intrinsic::assume>())) {
+ if (AC)
+ AC->unregisterAssumption(cast<CallInst>(I));
+ I->eraseFromParent();
+ }
+ }
}
// If we have any return instructions in the region, split those blocks so
@@ -1567,26 +1712,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
}
}
- // Erase debug info intrinsics. Variable updates within the new function are
- // invisible to debuggers. This could be improved by defining a DISubprogram
- // for the new function.
- for (BasicBlock &BB : *newFunction) {
- auto BlockIt = BB.begin();
- // Remove debug info intrinsics from the new function.
- while (BlockIt != BB.end()) {
- Instruction *Inst = &*BlockIt;
- ++BlockIt;
- if (isa<DbgInfoIntrinsic>(Inst))
- Inst->eraseFromParent();
- }
- // Remove debug info intrinsics which refer to values in the new function
- // from the old function.
- SmallVector<DbgVariableIntrinsic *, 4> DbgUsers;
- for (Instruction &I : BB)
- findDbgUsers(DbgUsers, &I);
- for (DbgVariableIntrinsic *DVI : DbgUsers)
- DVI->eraseFromParent();
- }
+ fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall);
// Mark the new function `noreturn` if applicable. Terminators which resume
// exception propagation are treated as returning instructions. This is to
@@ -1604,17 +1730,36 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
});
LLVM_DEBUG(if (verifyFunction(*oldFunction))
report_fatal_error("verification of oldFunction failed!"));
- LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, AC))
- report_fatal_error("Stale Asumption cache for old Function!"));
+ LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, *newFunction, AC))
+ report_fatal_error("Stale Asumption cache for old Function!"));
return newFunction;
}
-bool CodeExtractor::verifyAssumptionCache(const Function& F,
+bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc,
+ const Function &NewFunc,
AssumptionCache *AC) {
for (auto AssumeVH : AC->assumptions()) {
- CallInst *I = cast<CallInst>(AssumeVH);
- if (I->getFunction() != &F)
+ CallInst *I = dyn_cast_or_null<CallInst>(AssumeVH);
+ if (!I)
+ continue;
+
+ // There shouldn't be any llvm.assume intrinsics in the new function.
+ if (I->getFunction() != &OldFunc)
return true;
+
+ // There shouldn't be any stale affected values in the assumption cache
+ // that were previously in the old function, but that have now been moved
+ // to the new function.
+ for (auto AffectedValVH : AC->assumptionsFor(I->getOperand(0))) {
+ CallInst *AffectedCI = dyn_cast_or_null<CallInst>(AffectedValVH);
+ if (!AffectedCI)
+ continue;
+ if (AffectedCI->getFunction() != &OldFunc)
+ return true;
+ auto *AssumedInst = dyn_cast<Instruction>(AffectedCI->getOperand(0));
+ if (AssumedInst->getFunction() != &OldFunc)
+ return true;
+ }
}
return false;
}
diff --git a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
index 93395ac761ab5..08047dc0f96ee 100644
--- a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/CodeMoverUtils.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/PostDominators.h"
@@ -30,6 +31,201 @@ STATISTIC(NotControlFlowEquivalent,
STATISTIC(NotMovedPHINode, "Movement of PHINodes are not supported");
STATISTIC(NotMovedTerminator, "Movement of Terminator are not supported");
+namespace {
+/// Represent a control condition. A control condition is a condition of a
+/// terminator to decide which successors to execute. The pointer field
+/// represents the address of the condition of the terminator. The integer field
+/// is a bool, it is true when the basic block is executed when V is true. For
+/// example, `br %cond, bb0, bb1` %cond is a control condition of bb0 with the
+/// integer field equals to true, while %cond is a control condition of bb1 with
+/// the integer field equals to false.
+using ControlCondition = PointerIntPair<Value *, 1, bool>;
+#ifndef NDEBUG
+raw_ostream &operator<<(raw_ostream &OS, const ControlCondition &C) {
+ OS << "[" << *C.getPointer() << ", " << (C.getInt() ? "true" : "false")
+ << "]";
+ return OS;
+}
+#endif
+
+/// Represent a set of control conditions required to execute ToBB from FromBB.
+class ControlConditions {
+ using ConditionVectorTy = SmallVector<ControlCondition, 6>;
+
+ /// A SmallVector of control conditions.
+ ConditionVectorTy Conditions;
+
+public:
+ /// Return a ControlConditions which stores all conditions required to execute
+ /// \p BB from \p Dominator. If \p MaxLookup is non-zero, it limits the
+ /// number of conditions to collect. Return None if not all conditions are
+ /// collected successfully, or we hit the limit.
+ static const Optional<ControlConditions>
+ collectControlConditions(const BasicBlock &BB, const BasicBlock &Dominator,
+ const DominatorTree &DT,
+ const PostDominatorTree &PDT,
+ unsigned MaxLookup = 6);
+
+ /// Return true if there exists no control conditions required to execute ToBB
+ /// from FromBB.
+ bool isUnconditional() const { return Conditions.empty(); }
+
+ /// Return a constant reference of Conditions.
+ const ConditionVectorTy &getControlConditions() const { return Conditions; }
+
+ /// Add \p V as one of the ControlCondition in Condition with IsTrueCondition
+ /// equals to \p True. Return true if inserted successfully.
+ bool addControlCondition(ControlCondition C);
+
+ /// Return true if for all control conditions in Conditions, there exists an
+ /// equivalent control condition in \p Other.Conditions.
+ bool isEquivalent(const ControlConditions &Other) const;
+
+ /// Return true if \p C1 and \p C2 are equivalent.
+ static bool isEquivalent(const ControlCondition &C1,
+ const ControlCondition &C2);
+
+private:
+ ControlConditions() = default;
+
+ static bool isEquivalent(const Value &V1, const Value &V2);
+ static bool isInverse(const Value &V1, const Value &V2);
+};
+} // namespace
+
+static bool domTreeLevelBefore(DominatorTree *DT, const Instruction *InstA,
+ const Instruction *InstB) {
+ // Use ordered basic block in case the 2 instructions are in the same
+ // block.
+ if (InstA->getParent() == InstB->getParent())
+ return InstA->comesBefore(InstB);
+
+ DomTreeNode *DA = DT->getNode(InstA->getParent());
+ DomTreeNode *DB = DT->getNode(InstB->getParent());
+ return DA->getLevel() < DB->getLevel();
+}
+
+const Optional<ControlConditions> ControlConditions::collectControlConditions(
+ const BasicBlock &BB, const BasicBlock &Dominator, const DominatorTree &DT,
+ const PostDominatorTree &PDT, unsigned MaxLookup) {
+ assert(DT.dominates(&Dominator, &BB) && "Expecting Dominator to dominate BB");
+
+ ControlConditions Conditions;
+ unsigned NumConditions = 0;
+
+ // BB is executed unconditional from itself.
+ if (&Dominator == &BB)
+ return Conditions;
+
+ const BasicBlock *CurBlock = &BB;
+ // Walk up the dominator tree from the associated DT node for BB to the
+ // associated DT node for Dominator.
+ do {
+ assert(DT.getNode(CurBlock) && "Expecting a valid DT node for CurBlock");
+ BasicBlock *IDom = DT.getNode(CurBlock)->getIDom()->getBlock();
+ assert(DT.dominates(&Dominator, IDom) &&
+ "Expecting Dominator to dominate IDom");
+
+ // Limitation: can only handle branch instruction currently.
+ const BranchInst *BI = dyn_cast<BranchInst>(IDom->getTerminator());
+ if (!BI)
+ return None;
+
+ bool Inserted = false;
+ if (PDT.dominates(CurBlock, IDom)) {
+ LLVM_DEBUG(dbgs() << CurBlock->getName()
+ << " is executed unconditionally from "
+ << IDom->getName() << "\n");
+ } else if (PDT.dominates(CurBlock, BI->getSuccessor(0))) {
+ LLVM_DEBUG(dbgs() << CurBlock->getName() << " is executed when \""
+ << *BI->getCondition() << "\" is true from "
+ << IDom->getName() << "\n");
+ Inserted = Conditions.addControlCondition(
+ ControlCondition(BI->getCondition(), true));
+ } else if (PDT.dominates(CurBlock, BI->getSuccessor(1))) {
+ LLVM_DEBUG(dbgs() << CurBlock->getName() << " is executed when \""
+ << *BI->getCondition() << "\" is false from "
+ << IDom->getName() << "\n");
+ Inserted = Conditions.addControlCondition(
+ ControlCondition(BI->getCondition(), false));
+ } else
+ return None;
+
+ if (Inserted)
+ ++NumConditions;
+
+ if (MaxLookup != 0 && NumConditions > MaxLookup)
+ return None;
+
+ CurBlock = IDom;
+ } while (CurBlock != &Dominator);
+
+ return Conditions;
+}
+
+bool ControlConditions::addControlCondition(ControlCondition C) {
+ bool Inserted = false;
+ if (none_of(Conditions, [&](ControlCondition &Exists) {
+ return ControlConditions::isEquivalent(C, Exists);
+ })) {
+ Conditions.push_back(C);
+ Inserted = true;
+ }
+
+ LLVM_DEBUG(dbgs() << (Inserted ? "Inserted " : "Not inserted ") << C << "\n");
+ return Inserted;
+}
+
+bool ControlConditions::isEquivalent(const ControlConditions &Other) const {
+ if (Conditions.empty() && Other.Conditions.empty())
+ return true;
+
+ if (Conditions.size() != Other.Conditions.size())
+ return false;
+
+ return all_of(Conditions, [&](const ControlCondition &C) {
+ return any_of(Other.Conditions, [&](const ControlCondition &OtherC) {
+ return ControlConditions::isEquivalent(C, OtherC);
+ });
+ });
+}
+
+bool ControlConditions::isEquivalent(const ControlCondition &C1,
+ const ControlCondition &C2) {
+ if (C1.getInt() == C2.getInt()) {
+ if (isEquivalent(*C1.getPointer(), *C2.getPointer()))
+ return true;
+ } else if (isInverse(*C1.getPointer(), *C2.getPointer()))
+ return true;
+
+ return false;
+}
+
+// FIXME: Use SCEV and reuse GVN/CSE logic to check for equivalence between
+// Values.
+// Currently, isEquivalent rely on other passes to ensure equivalent conditions
+// have the same value, e.g. GVN.
+bool ControlConditions::isEquivalent(const Value &V1, const Value &V2) {
+ return &V1 == &V2;
+}
+
+bool ControlConditions::isInverse(const Value &V1, const Value &V2) {
+ if (const CmpInst *Cmp1 = dyn_cast<CmpInst>(&V1))
+ if (const CmpInst *Cmp2 = dyn_cast<CmpInst>(&V2)) {
+ if (Cmp1->getPredicate() == Cmp2->getInversePredicate() &&
+ Cmp1->getOperand(0) == Cmp2->getOperand(0) &&
+ Cmp1->getOperand(1) == Cmp2->getOperand(1))
+ return true;
+
+ if (Cmp1->getPredicate() ==
+ CmpInst::getSwappedPredicate(Cmp2->getInversePredicate()) &&
+ Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
+ Cmp1->getOperand(1) == Cmp2->getOperand(0))
+ return true;
+ }
+ return false;
+}
+
bool llvm::isControlFlowEquivalent(const Instruction &I0, const Instruction &I1,
const DominatorTree &DT,
const PostDominatorTree &PDT) {
@@ -42,8 +238,30 @@ bool llvm::isControlFlowEquivalent(const BasicBlock &BB0, const BasicBlock &BB1,
if (&BB0 == &BB1)
return true;
- return ((DT.dominates(&BB0, &BB1) && PDT.dominates(&BB1, &BB0)) ||
- (PDT.dominates(&BB0, &BB1) && DT.dominates(&BB1, &BB0)));
+ if ((DT.dominates(&BB0, &BB1) && PDT.dominates(&BB1, &BB0)) ||
+ (PDT.dominates(&BB0, &BB1) && DT.dominates(&BB1, &BB0)))
+ return true;
+
+ // If the set of conditions required to execute BB0 and BB1 from their common
+ // dominator are the same, then BB0 and BB1 are control flow equivalent.
+ const BasicBlock *CommonDominator = DT.findNearestCommonDominator(&BB0, &BB1);
+ LLVM_DEBUG(dbgs() << "The nearest common dominator of " << BB0.getName()
+ << " and " << BB1.getName() << " is "
+ << CommonDominator->getName() << "\n");
+
+ const Optional<ControlConditions> BB0Conditions =
+ ControlConditions::collectControlConditions(BB0, *CommonDominator, DT,
+ PDT);
+ if (BB0Conditions == None)
+ return false;
+
+ const Optional<ControlConditions> BB1Conditions =
+ ControlConditions::collectControlConditions(BB1, *CommonDominator, DT,
+ PDT);
+ if (BB1Conditions == None)
+ return false;
+
+ return BB0Conditions->isEquivalent(*BB1Conditions);
}
static bool reportInvalidCandidate(const Instruction &I,
@@ -90,9 +308,12 @@ collectInstructionsInBetween(Instruction &StartInst, const Instruction &EndInst,
}
bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
- const DominatorTree &DT,
- const PostDominatorTree &PDT,
- DependenceInfo &DI) {
+ DominatorTree &DT, const PostDominatorTree *PDT,
+ DependenceInfo *DI) {
+ // Skip tests when we don't have PDT or DI
+ if (!PDT || !DI)
+ return false;
+
// Cannot move itself before itself.
if (&I == &InsertPoint)
return false;
@@ -108,28 +329,22 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
return reportInvalidCandidate(I, NotMovedTerminator);
// TODO remove this limitation.
- if (!isControlFlowEquivalent(I, InsertPoint, DT, PDT))
+ if (!isControlFlowEquivalent(I, InsertPoint, DT, *PDT))
return reportInvalidCandidate(I, NotControlFlowEquivalent);
- // As I and InsertPoint are control flow equivalent, if I dominates
- // InsertPoint, then I comes before InsertPoint.
- const bool MoveForward = DT.dominates(&I, &InsertPoint);
- if (MoveForward) {
- // When I is being moved forward, we need to make sure the InsertPoint
- // dominates every users. Or else, a user may be using an undefined I.
+ if (!DT.dominates(&InsertPoint, &I))
for (const Use &U : I.uses())
if (auto *UserInst = dyn_cast<Instruction>(U.getUser()))
if (UserInst != &InsertPoint && !DT.dominates(&InsertPoint, U))
return false;
- } else {
- // When I is being moved backward, we need to make sure all its opernads
- // dominates the InsertPoint. Or else, an operand may be undefined for I.
+ if (!DT.dominates(&I, &InsertPoint))
for (const Value *Op : I.operands())
if (auto *OpInst = dyn_cast<Instruction>(Op))
if (&InsertPoint == OpInst || !DT.dominates(OpInst, &InsertPoint))
return false;
- }
+ DT.updateDFSNumbers();
+ const bool MoveForward = domTreeLevelBefore(&DT, &I, &InsertPoint);
Instruction &StartInst = (MoveForward ? I : InsertPoint);
Instruction &EndInst = (MoveForward ? InsertPoint : I);
SmallPtrSet<Instruction *, 10> InstsToCheck;
@@ -162,7 +377,7 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
// StartInst to \p EndInst.
if (std::any_of(InstsToCheck.begin(), InstsToCheck.end(),
[&DI, &I](Instruction *CurInst) {
- auto DepResult = DI.depends(&I, CurInst, true);
+ auto DepResult = DI->depends(&I, CurInst, true);
if (DepResult &&
(DepResult->isOutput() || DepResult->isFlow() ||
DepResult->isAnti()))
@@ -174,16 +389,40 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
return true;
}
-void llvm::moveInstsBottomUp(BasicBlock &FromBB, BasicBlock &ToBB,
- const DominatorTree &DT,
- const PostDominatorTree &PDT, DependenceInfo &DI) {
+bool llvm::isSafeToMoveBefore(BasicBlock &BB, Instruction &InsertPoint,
+ DominatorTree &DT, const PostDominatorTree *PDT,
+ DependenceInfo *DI) {
+ return llvm::all_of(BB, [&](Instruction &I) {
+ if (BB.getTerminator() == &I)
+ return true;
+
+ return isSafeToMoveBefore(I, InsertPoint, DT, PDT, DI);
+ });
+}
+
+void llvm::moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB,
+ DominatorTree &DT,
+ const PostDominatorTree &PDT,
+ DependenceInfo &DI) {
for (auto It = ++FromBB.rbegin(); It != FromBB.rend();) {
Instruction *MovePos = ToBB.getFirstNonPHIOrDbg();
Instruction &I = *It;
// Increment the iterator before modifying FromBB.
++It;
- if (isSafeToMoveBefore(I, *MovePos, DT, PDT, DI))
+ if (isSafeToMoveBefore(I, *MovePos, DT, &PDT, &DI))
+ I.moveBefore(MovePos);
+ }
+}
+
+void llvm::moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB,
+ DominatorTree &DT,
+ const PostDominatorTree &PDT,
+ DependenceInfo &DI) {
+ Instruction *MovePos = ToBB.getTerminator();
+ while (FromBB.size() > 1) {
+ Instruction &I = FromBB.front();
+ if (isSafeToMoveBefore(I, *MovePos, DT, &PDT, &DI))
I.moveBefore(MovePos);
}
}
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index b7b4bfa3734d0..8f98d81a3d797 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -30,6 +30,17 @@ namespace {
cl::opt<bool> Quiet("debugify-quiet",
cl::desc("Suppress verbose debugify output"));
+enum class Level {
+ Locations,
+ LocationsAndVariables
+};
+cl::opt<Level> DebugifyLevel(
+ "debugify-level", cl::desc("Kind of debug info to add"),
+ cl::values(clEnumValN(Level::Locations, "locations", "Locations only"),
+ clEnumValN(Level::LocationsAndVariables, "location+variables",
+ "Locations and Variables")),
+ cl::init(Level::LocationsAndVariables));
+
raw_ostream &dbg() { return Quiet ? nulls() : errs(); }
uint64_t getAllocSizeInBits(Module &M, Type *Ty) {
@@ -51,10 +62,11 @@ Instruction *findTerminatingInstruction(BasicBlock &BB) {
return I;
return BB.getTerminator();
}
+} // end anonymous namespace
-bool applyDebugifyMetadata(Module &M,
- iterator_range<Module::iterator> Functions,
- StringRef Banner) {
+bool llvm::applyDebugifyMetadata(
+ Module &M, iterator_range<Module::iterator> Functions, StringRef Banner,
+ std::function<bool(DIBuilder &DIB, Function &F)> ApplyToMF) {
// Skip modules with debug info.
if (M.getNamedMetadata("llvm.dbg.cu")) {
dbg() << Banner << "Skipping module with debug info\n";
@@ -63,6 +75,7 @@ bool applyDebugifyMetadata(Module &M,
DIBuilder DIB(M);
LLVMContext &Ctx = M.getContext();
+ auto *Int32Ty = Type::getInt32Ty(Ctx);
// Get a DIType which corresponds to Ty.
DenseMap<uint64_t, DIType *> TypeCache;
@@ -87,6 +100,7 @@ bool applyDebugifyMetadata(Module &M,
if (isFunctionSkipped(F))
continue;
+ bool InsertedDbgVal = false;
auto SPType = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None));
DISubprogram::DISPFlags SPFlags =
DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized;
@@ -95,11 +109,31 @@ bool applyDebugifyMetadata(Module &M,
auto SP = DIB.createFunction(CU, F.getName(), F.getName(), File, NextLine,
SPType, NextLine, DINode::FlagZero, SPFlags);
F.setSubprogram(SP);
+
+ // Helper that inserts a dbg.value before \p InsertBefore, copying the
+ // location (and possibly the type, if it's non-void) from \p TemplateInst.
+ auto insertDbgVal = [&](Instruction &TemplateInst,
+ Instruction *InsertBefore) {
+ std::string Name = utostr(NextVar++);
+ Value *V = &TemplateInst;
+ if (TemplateInst.getType()->isVoidTy())
+ V = ConstantInt::get(Int32Ty, 0);
+ const DILocation *Loc = TemplateInst.getDebugLoc().get();
+ auto LocalVar = DIB.createAutoVariable(SP, Name, File, Loc->getLine(),
+ getCachedDIType(V->getType()),
+ /*AlwaysPreserve=*/true);
+ DIB.insertDbgValueIntrinsic(V, LocalVar, DIB.createExpression(), Loc,
+ InsertBefore);
+ };
+
for (BasicBlock &BB : F) {
// Attach debug locations.
for (Instruction &I : BB)
I.setDebugLoc(DILocation::get(Ctx, NextLine++, 1, SP));
+ if (DebugifyLevel < Level::LocationsAndVariables)
+ continue;
+
// Inserting debug values into EH pads can break IR invariants.
if (BB.isEHPad())
continue;
@@ -126,25 +160,30 @@ bool applyDebugifyMetadata(Module &M,
if (!isa<PHINode>(I) && !I->isEHPad())
InsertBefore = I->getNextNode();
- std::string Name = utostr(NextVar++);
- const DILocation *Loc = I->getDebugLoc().get();
- auto LocalVar = DIB.createAutoVariable(SP, Name, File, Loc->getLine(),
- getCachedDIType(I->getType()),
- /*AlwaysPreserve=*/true);
- DIB.insertDbgValueIntrinsic(I, LocalVar, DIB.createExpression(), Loc,
- InsertBefore);
+ insertDbgVal(*I, InsertBefore);
+ InsertedDbgVal = true;
}
}
+ // Make sure we emit at least one dbg.value, otherwise MachineDebugify may
+ // not have anything to work with as it goes about inserting DBG_VALUEs.
+ // (It's common for MIR tests to be written containing skeletal IR with
+ // empty functions -- we're still interested in debugifying the MIR within
+ // those tests, and this helps with that.)
+ if (DebugifyLevel == Level::LocationsAndVariables && !InsertedDbgVal) {
+ auto *Term = findTerminatingInstruction(F.getEntryBlock());
+ insertDbgVal(*Term, Term);
+ }
+ if (ApplyToMF)
+ ApplyToMF(DIB, F);
DIB.finalizeSubprogram(SP);
}
DIB.finalize();
// Track the number of distinct lines and variables.
NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.debugify");
- auto *IntTy = Type::getInt32Ty(Ctx);
auto addDebugifyOperand = [&](unsigned N) {
NMD->addOperand(MDNode::get(
- Ctx, ValueAsMetadata::getConstant(ConstantInt::get(IntTy, N))));
+ Ctx, ValueAsMetadata::getConstant(ConstantInt::get(Int32Ty, N))));
};
addDebugifyOperand(NextLine - 1); // Original number of lines.
addDebugifyOperand(NextVar - 1); // Original number of variables.
@@ -159,6 +198,54 @@ bool applyDebugifyMetadata(Module &M,
return true;
}
+bool llvm::stripDebugifyMetadata(Module &M) {
+ bool Changed = false;
+
+ // Remove the llvm.debugify module-level named metadata.
+ NamedMDNode *DebugifyMD = M.getNamedMetadata("llvm.debugify");
+ if (DebugifyMD) {
+ M.eraseNamedMetadata(DebugifyMD);
+ Changed = true;
+ }
+
+ // Strip out all debug intrinsics and supporting metadata (subprograms, types,
+ // variables, etc).
+ Changed |= StripDebugInfo(M);
+
+ // Strip out the dead dbg.value prototype.
+ Function *DbgValF = M.getFunction("llvm.dbg.value");
+ if (DbgValF) {
+ assert(DbgValF->isDeclaration() && DbgValF->use_empty() &&
+ "Not all debug info stripped?");
+ DbgValF->eraseFromParent();
+ Changed = true;
+ }
+
+ // Strip out the module-level Debug Info Version metadata.
+ // FIXME: There must be an easier way to remove an operand from a NamedMDNode.
+ NamedMDNode *NMD = M.getModuleFlagsMetadata();
+ if (!NMD)
+ return Changed;
+ SmallVector<MDNode *, 4> Flags;
+ for (MDNode *Flag : NMD->operands())
+ Flags.push_back(Flag);
+ NMD->clearOperands();
+ for (MDNode *Flag : Flags) {
+ MDString *Key = dyn_cast_or_null<MDString>(Flag->getOperand(1));
+ if (Key->getString() == "Debug Info Version") {
+ Changed = true;
+ continue;
+ }
+ NMD->addOperand(Flag);
+ }
+ // If we left it empty we might as well remove it.
+ if (NMD->getNumOperands() == 0)
+ NMD->eraseFromParent();
+
+ return Changed;
+}
+
+namespace {
/// Return true if a mis-sized diagnostic is issued for \p DVI.
bool diagnoseMisSizedDbgValue(Module &M, DbgValueInst *DVI) {
// The size of a dbg.value's value operand should match the size of the
@@ -206,7 +293,7 @@ bool checkDebugifyMetadata(Module &M,
// Skip modules without debugify metadata.
NamedMDNode *NMD = M.getNamedMetadata("llvm.debugify");
if (!NMD) {
- dbg() << Banner << "Skipping module without debugify metadata\n";
+ dbg() << Banner << ": Skipping module without debugify metadata\n";
return false;
}
@@ -233,7 +320,7 @@ bool checkDebugifyMetadata(Module &M,
// Find missing lines.
for (Instruction &I : instructions(F)) {
- if (isa<DbgValueInst>(&I))
+ if (isa<DbgValueInst>(&I) || isa<PHINode>(&I))
continue;
auto DL = I.getDebugLoc();
@@ -243,11 +330,10 @@ bool checkDebugifyMetadata(Module &M,
}
if (!DL) {
- dbg() << "ERROR: Instruction with empty DebugLoc in function ";
+ dbg() << "WARNING: Instruction with empty DebugLoc in function ";
dbg() << F.getName() << " --";
I.print(dbg());
dbg() << "\n";
- HasErrors = true;
}
}
@@ -287,12 +373,9 @@ bool checkDebugifyMetadata(Module &M,
dbg() << " [" << NameOfWrappedPass << "]";
dbg() << ": " << (HasErrors ? "FAIL" : "PASS") << '\n';
- // Strip the Debugify Metadata if required.
- if (Strip) {
- StripDebugInfo(M);
- M.eraseNamedMetadata(NMD);
- return true;
- }
+ // Strip debugify metadata if required.
+ if (Strip)
+ return stripDebugifyMetadata(M);
return false;
}
@@ -301,7 +384,8 @@ bool checkDebugifyMetadata(Module &M,
/// legacy module pass manager.
struct DebugifyModulePass : public ModulePass {
bool runOnModule(Module &M) override {
- return applyDebugifyMetadata(M, M.functions(), "ModuleDebugify: ");
+ return applyDebugifyMetadata(M, M.functions(),
+ "ModuleDebugify: ", /*ApplyToMF*/ nullptr);
}
DebugifyModulePass() : ModulePass(ID) {}
@@ -320,7 +404,7 @@ struct DebugifyFunctionPass : public FunctionPass {
Module &M = *F.getParent();
auto FuncIt = F.getIterator();
return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
- "FunctionDebugify: ");
+ "FunctionDebugify: ", /*ApplyToMF*/ nullptr);
}
DebugifyFunctionPass() : FunctionPass(ID) {}
@@ -395,7 +479,8 @@ FunctionPass *createDebugifyFunctionPass() {
}
PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) {
- applyDebugifyMetadata(M, M.functions(), "ModuleDebugify: ");
+ applyDebugifyMetadata(M, M.functions(),
+ "ModuleDebugify: ", /*ApplyToMF*/ nullptr);
return PreservedAnalyses::all();
}
diff --git a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index 651f776a4915b..f84ff9e5aad1d 100644
--- a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -11,6 +11,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
diff --git a/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
index 914babeb6829d..cae9d9ee6d709 100644
--- a/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
+++ b/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/EscapeEnumerator.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/Local.h"
+
using namespace llvm;
static FunctionCallee getDefaultPersonalityFn(Module *M) {
diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp
index ad36790b8c6a6..c5dfbf9d92d13 100644
--- a/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ b/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -17,7 +17,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -196,8 +195,7 @@ evaluateBitcastFromPtr(Constant *Ptr, const DataLayout &DL,
Constant *const IdxList[] = {IdxZero, IdxZero};
Ptr = ConstantExpr::getGetElementPtr(Ty, Ptr, IdxList);
- if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI))
- Ptr = FoldedPtr;
+ Ptr = ConstantFoldConstant(Ptr, DL, TLI);
}
return Val;
}
@@ -266,33 +264,33 @@ static Function *getFunction(Constant *C) {
}
Function *
-Evaluator::getCalleeWithFormalArgs(CallSite &CS,
- SmallVector<Constant *, 8> &Formals) {
- auto *V = CS.getCalledValue();
+Evaluator::getCalleeWithFormalArgs(CallBase &CB,
+ SmallVectorImpl<Constant *> &Formals) {
+ auto *V = CB.getCalledOperand();
if (auto *Fn = getFunction(getVal(V)))
- return getFormalParams(CS, Fn, Formals) ? Fn : nullptr;
+ return getFormalParams(CB, Fn, Formals) ? Fn : nullptr;
auto *CE = dyn_cast<ConstantExpr>(V);
if (!CE || CE->getOpcode() != Instruction::BitCast ||
- !getFormalParams(CS, getFunction(CE->getOperand(0)), Formals))
+ !getFormalParams(CB, getFunction(CE->getOperand(0)), Formals))
return nullptr;
return dyn_cast<Function>(
ConstantFoldLoadThroughBitcast(CE, CE->getOperand(0)->getType(), DL));
}
-bool Evaluator::getFormalParams(CallSite &CS, Function *F,
- SmallVector<Constant *, 8> &Formals) {
+bool Evaluator::getFormalParams(CallBase &CB, Function *F,
+ SmallVectorImpl<Constant *> &Formals) {
if (!F)
return false;
auto *FTy = F->getFunctionType();
- if (FTy->getNumParams() > CS.getNumArgOperands()) {
+ if (FTy->getNumParams() > CB.getNumArgOperands()) {
LLVM_DEBUG(dbgs() << "Too few arguments for function.\n");
return false;
}
- auto ArgI = CS.arg_begin();
+ auto ArgI = CB.arg_begin();
for (auto ParI = FTy->param_begin(), ParE = FTy->param_end(); ParI != ParE;
++ParI) {
auto *ArgC = ConstantFoldLoadThroughBitcast(getVal(*ArgI), *ParI, DL);
@@ -339,7 +337,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
return false; // no volatile/atomic accesses.
}
Constant *Ptr = getVal(SI->getOperand(1));
- if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) {
+ Constant *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI);
+ if (Ptr != FoldedPtr) {
LLVM_DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
Ptr = FoldedPtr;
LLVM_DEBUG(dbgs() << "; To: " << *Ptr << "\n");
@@ -448,7 +447,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
}
Constant *Ptr = getVal(LI->getOperand(0));
- if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) {
+ Constant *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI);
+ if (Ptr != FoldedPtr) {
Ptr = FoldedPtr;
LLVM_DEBUG(dbgs() << "Found a constant pointer expression, constant "
"folding: "
@@ -476,22 +476,22 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
InstResult = AllocaTmps.back().get();
LLVM_DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
} else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
- CallSite CS(&*CurInst);
+ CallBase &CB = *cast<CallBase>(&*CurInst);
// Debug info can safely be ignored here.
- if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
+ if (isa<DbgInfoIntrinsic>(CB)) {
LLVM_DEBUG(dbgs() << "Ignoring debug info.\n");
++CurInst;
continue;
}
// Cannot handle inline asm.
- if (isa<InlineAsm>(CS.getCalledValue())) {
+ if (CB.isInlineAsm()) {
LLVM_DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
return false;
}
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CB)) {
if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
if (MSI->isVolatile()) {
LLVM_DEBUG(dbgs() << "Can not optimize a volatile memset "
@@ -559,7 +559,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
// Resolve function pointers.
SmallVector<Constant *, 8> Formals;
- Function *Callee = getCalleeWithFormalArgs(CS, Formals);
+ Function *Callee = getCalleeWithFormalArgs(CB, Formals);
if (!Callee || Callee->isInterposable()) {
LLVM_DEBUG(dbgs() << "Can not resolve function pointer.\n");
return false; // Cannot resolve.
@@ -567,9 +567,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
if (Callee->isDeclaration()) {
// If this is a function we can constant fold, do it.
- if (Constant *C = ConstantFoldCall(cast<CallBase>(CS.getInstruction()),
- Callee, Formals, TLI)) {
- InstResult = castCallResultIfNeeded(CS.getCalledValue(), C);
+ if (Constant *C = ConstantFoldCall(&CB, Callee, Formals, TLI)) {
+ InstResult = castCallResultIfNeeded(CB.getCalledOperand(), C);
if (!InstResult)
return false;
LLVM_DEBUG(dbgs() << "Constant folded function call. Result: "
@@ -592,7 +591,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
return false;
}
ValueStack.pop_back();
- InstResult = castCallResultIfNeeded(CS.getCalledValue(), RetVal);
+ InstResult = castCallResultIfNeeded(CB.getCalledOperand(), RetVal);
if (RetVal && !InstResult)
return false;
@@ -648,9 +647,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
}
if (!CurInst->use_empty()) {
- if (auto *FoldedInstResult = ConstantFoldConstant(InstResult, DL, TLI))
- InstResult = FoldedInstResult;
-
+ InstResult = ConstantFoldConstant(InstResult, DL, TLI);
setVal(&*CurInst, InstResult);
}
diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
new file mode 100644
index 0000000000000..460ba9e97fc6e
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -0,0 +1,337 @@
+//===- FixIrreducible.cpp - Convert irreducible control-flow into loops ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// An irreducible SCC is one which has multiple "header" blocks, i.e., blocks
+// with control-flow edges incident from outside the SCC. This pass converts a
+// irreducible SCC into a natural loop by applying the following transformation:
+//
+// 1. Collect the set of headers H of the SCC.
+// 2. Collect the set of predecessors P of these headers. These may be inside as
+// well as outside the SCC.
+// 3. Create block N and redirect every edge from set P to set H through N.
+//
+// This converts the SCC into a natural loop with N as the header: N is the only
+// block with edges incident from outside the SCC, and all backedges in the SCC
+// are incident on N, i.e., for every backedge, the head now dominates the tail.
+//
+// INPUT CFG: The blocks A and B form an irreducible loop with two headers.
+//
+// Entry
+// / \
+// v v
+// A ----> B
+// ^ /|
+// `----' |
+// v
+// Exit
+//
+// OUTPUT CFG: Edges incident on A and B are now redirected through a
+// new block N, forming a natural loop consisting of N, A and B.
+//
+// Entry
+// |
+// v
+// .---> N <---.
+// / / \ \
+// | / \ |
+// \ v v /
+// `-- A B --'
+// |
+// v
+// Exit
+//
+// The transformation is applied to every maximal SCC that is not already
+// recognized as a loop. The pass operates on all maximal SCCs found in the
+// function body outside of any loop, as well as those found inside each loop,
+// including inside any newly created loops. This ensures that any SCC hidden
+// inside a maximal SCC is also transformed.
+//
+// The actual transformation is handled by function CreateControlFlowHub, which
+// takes a set of incoming blocks (the predecessors) and outgoing blocks (the
+// headers). The function also moves every PHINode in an outgoing block to the
+// hub. Since the hub dominates all the outgoing blocks, each such PHINode
+// continues to dominate its uses. Since every header in an SCC has at least two
+// predecessors, every value used in the header (or later) but defined in a
+// predecessor (or earlier) is represented by a PHINode in a header. Hence the
+// above handling of PHINodes is sufficient and no further processing is
+// required to restore SSA.
+//
+// Limitation: The pass cannot handle switch statements and indirect
+// branches. Both must be lowered to plain branches first.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#define DEBUG_TYPE "fix-irreducible"
+
+using namespace llvm;
+
+namespace {
+struct FixIrreducible : public FunctionPass {
+ static char ID;
+ FixIrreducible() : FunctionPass(ID) {
+ initializeFixIrreduciblePass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequiredID(LowerSwitchID);
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreservedID(LowerSwitchID);
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override;
+};
+} // namespace
+
+char FixIrreducible::ID = 0;
+
+FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); }
+
+INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible",
+ "Convert irreducible control-flow into natural loops",
+ false /* Only looks at CFG */, false /* Analysis Pass */)
+INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible",
+ "Convert irreducible control-flow into natural loops",
+ false /* Only looks at CFG */, false /* Analysis Pass */)
+
+// When a new loop is created, existing children of the parent loop may now be
+// fully inside the new loop. Reconnect these as children of the new loop.
+static void reconnectChildLoops(LoopInfo &LI, Loop *ParentLoop, Loop *NewLoop,
+ SetVector<BasicBlock *> &Blocks,
+ SetVector<BasicBlock *> &Headers) {
+ auto &CandidateLoops = ParentLoop ? ParentLoop->getSubLoopsVector()
+ : LI.getTopLevelLoopsVector();
+ // The new loop cannot be its own child, and any candidate is a
+ // child iff its header is owned by the new loop. Move all the
+ // children to a new vector.
+ auto FirstChild = std::partition(
+ CandidateLoops.begin(), CandidateLoops.end(), [&](Loop *L) {
+ return L == NewLoop || Blocks.count(L->getHeader()) == 0;
+ });
+ SmallVector<Loop *, 8> ChildLoops(FirstChild, CandidateLoops.end());
+ CandidateLoops.erase(FirstChild, CandidateLoops.end());
+
+ for (auto II = ChildLoops.begin(), IE = ChildLoops.end(); II != IE; ++II) {
+ auto Child = *II;
+ LLVM_DEBUG(dbgs() << "child loop: " << Child->getHeader()->getName()
+ << "\n");
+ // TODO: A child loop whose header is also a header in the current
+ // SCC gets destroyed since its backedges are removed. That may
+ // not be necessary if we can retain such backedges.
+ if (Headers.count(Child->getHeader())) {
+ for (auto BB : Child->blocks()) {
+ LI.changeLoopFor(BB, NewLoop);
+ LLVM_DEBUG(dbgs() << "moved block from child: " << BB->getName()
+ << "\n");
+ }
+ LI.destroy(Child);
+ LLVM_DEBUG(dbgs() << "subsumed child loop (common header)\n");
+ continue;
+ }
+
+ Child->setParentLoop(nullptr);
+ NewLoop->addChildLoop(Child);
+ LLVM_DEBUG(dbgs() << "added child loop to new loop\n");
+ }
+}
+
+// Given a set of blocks and headers in an irreducible SCC, convert it into a
+// natural loop. Also insert this new loop at its appropriate place in the
+// hierarchy of loops.
+static void createNaturalLoopInternal(LoopInfo &LI, DominatorTree &DT,
+ Loop *ParentLoop,
+ SetVector<BasicBlock *> &Blocks,
+ SetVector<BasicBlock *> &Headers) {
+#ifndef NDEBUG
+ // All headers are part of the SCC
+ for (auto H : Headers) {
+ assert(Blocks.count(H));
+ }
+#endif
+
+ SetVector<BasicBlock *> Predecessors;
+ for (auto H : Headers) {
+ for (auto P : predecessors(H)) {
+ Predecessors.insert(P);
+ }
+ }
+
+ LLVM_DEBUG(
+ dbgs() << "Found predecessors:";
+ for (auto P : Predecessors) {
+ dbgs() << " " << P->getName();
+ }
+ dbgs() << "\n");
+
+ // Redirect all the backedges through a "hub" consisting of a series
+ // of guard blocks that manage the flow of control from the
+ // predecessors to the headers.
+ SmallVector<BasicBlock *, 8> GuardBlocks;
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ CreateControlFlowHub(&DTU, GuardBlocks, Predecessors, Headers, "irr");
+#if defined(EXPENSIVE_CHECKS)
+ assert(DT.verify(DominatorTree::VerificationLevel::Full));
+#else
+ assert(DT.verify(DominatorTree::VerificationLevel::Fast));
+#endif
+
+ // Create a new loop from the now-transformed cycle
+ auto NewLoop = LI.AllocateLoop();
+ if (ParentLoop) {
+ ParentLoop->addChildLoop(NewLoop);
+ } else {
+ LI.addTopLevelLoop(NewLoop);
+ }
+
+ // Add the guard blocks to the new loop. The first guard block is
+ // the head of all the backedges, and it is the first to be inserted
+ // in the loop. This ensures that it is recognized as the
+ // header. Since the new loop is already in LoopInfo, the new blocks
+ // are also propagated up the chain of parent loops.
+ for (auto G : GuardBlocks) {
+ LLVM_DEBUG(dbgs() << "added guard block: " << G->getName() << "\n");
+ NewLoop->addBasicBlockToLoop(G, LI);
+ }
+
+ // Add the SCC blocks to the new loop.
+ for (auto BB : Blocks) {
+ NewLoop->addBlockEntry(BB);
+ if (LI.getLoopFor(BB) == ParentLoop) {
+ LLVM_DEBUG(dbgs() << "moved block from parent: " << BB->getName()
+ << "\n");
+ LI.changeLoopFor(BB, NewLoop);
+ } else {
+ LLVM_DEBUG(dbgs() << "added block from child: " << BB->getName() << "\n");
+ }
+ }
+ LLVM_DEBUG(dbgs() << "header for new loop: "
+ << NewLoop->getHeader()->getName() << "\n");
+
+ reconnectChildLoops(LI, ParentLoop, NewLoop, Blocks, Headers);
+
+ NewLoop->verifyLoop();
+ if (ParentLoop) {
+ ParentLoop->verifyLoop();
+ }
+#if defined(EXPENSIVE_CHECKS)
+ LI.verify(DT);
+#endif // EXPENSIVE_CHECKS
+}
+
+namespace llvm {
+// Enable the graph traits required for traversing a Loop body.
+template <> struct GraphTraits<Loop> : LoopBodyTraits {};
+} // namespace llvm
+
+// Overloaded wrappers to go with the function template below.
+static BasicBlock *unwrapBlock(BasicBlock *B) { return B; }
+static BasicBlock *unwrapBlock(LoopBodyTraits::NodeRef &N) { return N.second; }
+
+static void createNaturalLoop(LoopInfo &LI, DominatorTree &DT, Function *F,
+ SetVector<BasicBlock *> &Blocks,
+ SetVector<BasicBlock *> &Headers) {
+ createNaturalLoopInternal(LI, DT, nullptr, Blocks, Headers);
+}
+
+static void createNaturalLoop(LoopInfo &LI, DominatorTree &DT, Loop &L,
+ SetVector<BasicBlock *> &Blocks,
+ SetVector<BasicBlock *> &Headers) {
+ createNaturalLoopInternal(LI, DT, &L, Blocks, Headers);
+}
+
+// Convert irreducible SCCs; Graph G may be a Function* or a Loop&.
+template <class Graph>
+static bool makeReducible(LoopInfo &LI, DominatorTree &DT, Graph &&G) {
+ bool Changed = false;
+ for (auto Scc = scc_begin(G); !Scc.isAtEnd(); ++Scc) {
+ if (Scc->size() < 2)
+ continue;
+ SetVector<BasicBlock *> Blocks;
+ LLVM_DEBUG(dbgs() << "Found SCC:");
+ for (auto N : *Scc) {
+ auto BB = unwrapBlock(N);
+ LLVM_DEBUG(dbgs() << " " << BB->getName());
+ Blocks.insert(BB);
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+
+ // Minor optimization: The SCC blocks are usually discovered in an order
+ // that is the opposite of the order in which these blocks appear as branch
+ // targets. This results in a lot of condition inversions in the control
+ // flow out of the new ControlFlowHub, which can be mitigated if the orders
+ // match. So we discover the headers using the reverse of the block order.
+ SetVector<BasicBlock *> Headers;
+ LLVM_DEBUG(dbgs() << "Found headers:");
+ for (auto BB : reverse(Blocks)) {
+ for (const auto P : predecessors(BB)) {
+ // Skip unreachable predecessors.
+ if (!DT.isReachableFromEntry(P))
+ continue;
+ if (!Blocks.count(P)) {
+ LLVM_DEBUG(dbgs() << " " << BB->getName());
+ Headers.insert(BB);
+ break;
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+
+ if (Headers.size() == 1) {
+ assert(LI.isLoopHeader(Headers.front()));
+ LLVM_DEBUG(dbgs() << "Natural loop with a single header: skipped\n");
+ continue;
+ }
+ createNaturalLoop(LI, DT, G, Blocks, Headers);
+ Changed = true;
+ }
+ return Changed;
+}
+
+bool FixIrreducible::runOnFunction(Function &F) {
+ LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: "
+ << F.getName() << "\n");
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ bool Changed = false;
+ SmallVector<Loop *, 8> WorkList;
+
+ LLVM_DEBUG(dbgs() << "visiting top-level\n");
+ Changed |= makeReducible(LI, DT, &F);
+
+ // Any SCCs reduced are now already in the list of top-level loops, so simply
+ // add them all to the worklist.
+ for (auto L : LI) {
+ WorkList.push_back(L);
+ }
+
+ while (!WorkList.empty()) {
+ auto L = WorkList.back();
+ WorkList.pop_back();
+ LLVM_DEBUG(dbgs() << "visiting loop with header "
+ << L->getHeader()->getName() << "\n");
+ Changed |= makeReducible(LI, DT, *L);
+ // Any SCCs reduced are now already in the list of child loops, so simply
+ // add them all to the worklist.
+ WorkList.append(L->begin(), L->end());
+ }
+
+ return Changed;
+}
diff --git a/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/llvm/lib/Transforms/Utils/FlattenCFG.cpp
index 893f23eb60482..0098dcaeb07a0 100644
--- a/llvm/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/llvm/lib/Transforms/Utils/FlattenCFG.cpp
@@ -45,12 +45,12 @@ class FlattenCFGOpt {
bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder);
/// Compare a pair of blocks: \p Block1 and \p Block2, which
- /// are from two if-regions whose entry blocks are \p Head1 and \p
- /// Head2. \returns true if \p Block1 and \p Block2 contain identical
+ /// are from two if-regions, where \p Head2 is the entry block of the 2nd
+ /// if-region. \returns true if \p Block1 and \p Block2 contain identical
/// instructions, and have no memory reference alias with \p Head2.
/// This is used as a legality check for merging if-regions.
- bool CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
- BasicBlock *Block1, BasicBlock *Block2);
+ bool CompareIfRegionBlock(BasicBlock *Block1, BasicBlock *Block2,
+ BasicBlock *Head2);
public:
FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {}
@@ -97,7 +97,7 @@ public:
/// br label %if.end;
///
/// Current implementation handles two cases.
-/// Case 1: \param BB is on the else-path.
+/// Case 1: BB is on the else-path.
///
/// BB1
/// / |
@@ -105,7 +105,7 @@ public:
/// / \ |
/// BB3 \ | where, BB1, BB2 contain conditional branches.
/// \ | / BB3 contains unconditional branch.
-/// \ | / BB4 corresponds to \param BB which is also the merge.
+/// \ | / BB4 corresponds to BB which is also the merge.
/// BB => BB4
///
///
@@ -114,14 +114,14 @@ public:
/// if (a == b && c == d)
/// statement; // BB3
///
-/// Case 2: \param BB BB is on the then-path.
+/// Case 2: BB is on the then-path.
///
/// BB1
/// / |
/// | BB2
/// \ / | where BB1, BB2 contain conditional branches.
/// BB => BB3 | BB3 contains unconditiona branch and corresponds
-/// \ / to \param BB. BB4 is the merge.
+/// \ / to BB. BB4 is the merge.
/// BB4
///
/// Corresponding source code:
@@ -129,9 +129,9 @@ public:
/// if (a == b || c == d)
/// statement; // BB3
///
-/// In both cases, \param BB is the common successor of conditional branches.
-/// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as
-/// its predecessor. In Case 2, \param BB (BB3) only has conditional branches
+/// In both cases, BB is the common successor of conditional branches.
+/// In Case 1, BB (BB4) has an unconditional branch (BB3) as
+/// its predecessor. In Case 2, BB (BB3) only has conditional branches
/// as its predecessors.
bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
PHINode *PHI = dyn_cast<PHINode>(BB->begin());
@@ -315,25 +315,16 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
return true;
}
-/// Compare blocks from two if-regions, where \param Head1 is the entry of the
-/// 1st if-region. \param Head2 is the entry of the 2nd if-region. \param
-/// Block1 is a block in the 1st if-region to compare. \param Block2 is a block
-// in the 2nd if-region to compare. \returns true if \param Block1 and \param
-/// Block2 have identical instructions and do not have memory reference alias
-/// with \param Head2.
-bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
- BasicBlock *Block1,
- BasicBlock *Block2) {
+/// Compare blocks from two if-regions, where \param Head2 is the entry of the
+/// 2nd if-region. \param Block1 is a block in the 1st if-region to compare.
+/// \param Block2 is a block in the 2nd if-region to compare. \returns true if
+/// Block1 and Block2 have identical instructions and do not have
+/// memory reference alias with Head2.
+bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Block1, BasicBlock *Block2,
+ BasicBlock *Head2) {
Instruction *PTI2 = Head2->getTerminator();
Instruction *PBI2 = &Head2->front();
- bool eq1 = (Block1 == Head1);
- bool eq2 = (Block2 == Head2);
- if (eq1 || eq2) {
- // An empty then-path or else-path.
- return (eq1 == eq2);
- }
-
// Check whether instructions in Block1 and Block2 are identical
// and do not alias with instructions in Head2.
BasicBlock::iterator iter1 = Block1->begin();
@@ -395,6 +386,29 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
/// To:
/// if (a || b)
/// statement;
+///
+///
+/// And from:
+/// if (a)
+/// ;
+/// else
+/// statement;
+/// if (b)
+/// ;
+/// else
+/// statement;
+///
+/// To:
+/// if (a && b)
+/// ;
+/// else
+/// statement;
+///
+/// We always take the form of the first if-region. This means that if the
+/// statement in the first if-region, is in the "then-path", while in the second
+/// if-region it is in the "else-path", then we convert the second to the first
+/// form, by inverting the condition and the branch successors. The same
+/// approach goes for the opposite case.
bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
BasicBlock *IfTrue2, *IfFalse2;
Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2);
@@ -415,22 +429,42 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
BasicBlock *FirstEntryBlock = CInst1->getParent();
// Either then-path or else-path should be empty.
- if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock))
- return false;
- if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock))
- return false;
+ bool InvertCond2 = false;
+ BinaryOperator::BinaryOps CombineOp;
+ if (IfFalse1 == FirstEntryBlock) {
+ // The else-path is empty, so we must use "or" operation to combine the
+ // conditions.
+ CombineOp = BinaryOperator::Or;
+ if (IfFalse2 != SecondEntryBlock) {
+ if (IfTrue2 != SecondEntryBlock)
+ return false;
- Instruction *PTI2 = SecondEntryBlock->getTerminator();
- Instruction *PBI2 = &SecondEntryBlock->front();
+ InvertCond2 = true;
+ std::swap(IfTrue2, IfFalse2);
+ }
- if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1,
- IfTrue2))
- return false;
+ if (!CompareIfRegionBlock(IfTrue1, IfTrue2, SecondEntryBlock))
+ return false;
+ } else if (IfTrue1 == FirstEntryBlock) {
+ // The then-path is empty, so we must use "and" operation to combine the
+ // conditions.
+ CombineOp = BinaryOperator::And;
+ if (IfTrue2 != SecondEntryBlock) {
+ if (IfFalse2 != SecondEntryBlock)
+ return false;
+
+ InvertCond2 = true;
+ std::swap(IfTrue2, IfFalse2);
+ }
- if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1,
- IfFalse2))
+ if (!CompareIfRegionBlock(IfFalse1, IfFalse2, SecondEntryBlock))
+ return false;
+ } else
return false;
+ Instruction *PTI2 = SecondEntryBlock->getTerminator();
+ Instruction *PBI2 = &SecondEntryBlock->front();
+
// Check whether \param SecondEntryBlock has side-effect and is safe to
// speculate.
for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) {
@@ -445,12 +479,22 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
FirstEntryBlock->getInstList()
.splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList());
BranchInst *PBI = cast<BranchInst>(FirstEntryBlock->getTerminator());
- Value *CC = PBI->getCondition();
+ assert(PBI->getCondition() == IfCond2);
BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
Builder.SetInsertPoint(PBI);
- Value *NC = Builder.CreateOr(CInst1, CC);
- PBI->replaceUsesOfWith(CC, NC);
+ if (InvertCond2) {
+ // If this is a "cmp" instruction, only used for branching (and nowhere
+ // else), then we can simply invert the predicate.
+ auto Cmp2 = dyn_cast<CmpInst>(CInst2);
+ if (Cmp2 && Cmp2->hasOneUse())
+ Cmp2->setPredicate(Cmp2->getInversePredicate());
+ else
+ CInst2 = cast<Instruction>(Builder.CreateNot(CInst2));
+ PBI->swapSuccessors();
+ }
+ Value *NC = Builder.CreateBinOp(CombineOp, CInst1, CInst2);
+ PBI->replaceUsesOfWith(IfCond2, NC);
Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
// Handle PHI node to replace its predecessors to FirstEntryBlock.
@@ -496,6 +540,6 @@ bool FlattenCFGOpt::run(BasicBlock *BB) {
/// FlattenCFG - This function is used to flatten a CFG. For
/// example, it uses parallel-and and parallel-or mode to collapse
/// if-conditions and merge if-regions with identical statements.
-bool llvm::FlattenCFG(BasicBlock *BB, AliasAnalysis *AA) {
+bool llvm::FlattenCFG(BasicBlock *BB, AAResults *AA) {
return FlattenCFGOpt(AA).run(BB);
}
diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index a9b28754c8e9c..101cb232d8aed 100644
--- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -20,7 +20,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -52,22 +51,28 @@ using namespace llvm;
#define DEBUG_TYPE "functioncomparator"
int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
- if (L < R) return -1;
- if (L > R) return 1;
+ if (L < R)
+ return -1;
+ if (L > R)
+ return 1;
return 0;
}
int FunctionComparator::cmpOrderings(AtomicOrdering L, AtomicOrdering R) const {
- if ((int)L < (int)R) return -1;
- if ((int)L > (int)R) return 1;
+ if ((int)L < (int)R)
+ return -1;
+ if ((int)L > (int)R)
+ return 1;
return 0;
}
int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth()))
return Res;
- if (L.ugt(R)) return 1;
- if (R.ugt(L)) return -1;
+ if (L.ugt(R))
+ return 1;
+ if (R.ugt(L))
+ return -1;
return 0;
}
@@ -166,21 +171,17 @@ int FunctionComparator::cmpRangeMetadata(const MDNode *L,
return 0;
}
-int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L,
- const Instruction *R) const {
- ImmutableCallSite LCS(L);
- ImmutableCallSite RCS(R);
-
- assert(LCS && RCS && "Must be calls or invokes!");
- assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!");
+int FunctionComparator::cmpOperandBundlesSchema(const CallBase &LCS,
+ const CallBase &RCS) const {
+ assert(LCS.getOpcode() == RCS.getOpcode() && "Can't compare otherwise!");
if (int Res =
cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles()))
return Res;
- for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) {
- auto OBL = LCS.getOperandBundleAt(i);
- auto OBR = RCS.getOperandBundleAt(i);
+ for (unsigned I = 0, E = LCS.getNumOperandBundles(); I != E; ++I) {
+ auto OBL = LCS.getOperandBundleAt(I);
+ auto OBR = RCS.getOperandBundleAt(I);
if (int Res = OBL.getTagName().compare(OBR.getTagName()))
return Res;
@@ -227,9 +228,9 @@ int FunctionComparator::cmpConstants(const Constant *L,
unsigned TyRWidth = 0;
if (auto *VecTyL = dyn_cast<VectorType>(TyL))
- TyLWidth = VecTyL->getBitWidth();
+ TyLWidth = VecTyL->getPrimitiveSizeInBits().getFixedSize();
if (auto *VecTyR = dyn_cast<VectorType>(TyR))
- TyRWidth = VecTyR->getBitWidth();
+ TyRWidth = VecTyR->getPrimitiveSizeInBits().getFixedSize();
if (TyLWidth != TyRWidth)
return cmpNumbers(TyLWidth, TyRWidth);
@@ -328,8 +329,8 @@ int FunctionComparator::cmpConstants(const Constant *L,
case Value::ConstantVectorVal: {
const ConstantVector *LV = cast<ConstantVector>(L);
const ConstantVector *RV = cast<ConstantVector>(R);
- unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements();
- unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements();
+ unsigned NumElementsL = cast<FixedVectorType>(TyL)->getNumElements();
+ unsigned NumElementsR = cast<FixedVectorType>(TyR)->getNumElements();
if (int Res = cmpNumbers(NumElementsL, NumElementsR))
return Res;
for (uint64_t i = 0; i < NumElementsL; ++i) {
@@ -361,12 +362,12 @@ int FunctionComparator::cmpConstants(const Constant *L,
if (LBA->getFunction() == RBA->getFunction()) {
// They are BBs in the same function. Order by which comes first in the
// BB order of the function. This order is deterministic.
- Function* F = LBA->getFunction();
+ Function *F = LBA->getFunction();
BasicBlock *LBB = LBA->getBasicBlock();
BasicBlock *RBB = RBA->getBasicBlock();
if (LBB == RBB)
return 0;
- for(BasicBlock &BB : F->getBasicBlockList()) {
+ for (BasicBlock &BB : F->getBasicBlockList()) {
if (&BB == LBB) {
assert(&BB != RBB);
return -1;
@@ -476,14 +477,25 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
return 0;
}
- case Type::ArrayTyID:
- case Type::VectorTyID: {
- auto *STyL = cast<SequentialType>(TyL);
- auto *STyR = cast<SequentialType>(TyR);
+ case Type::ArrayTyID: {
+ auto *STyL = cast<ArrayType>(TyL);
+ auto *STyR = cast<ArrayType>(TyR);
if (STyL->getNumElements() != STyR->getNumElements())
return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
return cmpTypes(STyL->getElementType(), STyR->getElementType());
}
+ case Type::FixedVectorTyID:
+ case Type::ScalableVectorTyID: {
+ auto *STyL = cast<VectorType>(TyL);
+ auto *STyR = cast<VectorType>(TyR);
+ if (STyL->getElementCount().Scalable != STyR->getElementCount().Scalable)
+ return cmpNumbers(STyL->getElementCount().Scalable,
+ STyR->getElementCount().Scalable);
+ if (STyL->getElementCount().Min != STyR->getElementCount().Min)
+ return cmpNumbers(STyL->getElementCount().Min,
+ STyR->getElementCount().Min);
+ return cmpTypes(STyL->getElementType(), STyR->getElementType());
+ }
}
}
@@ -551,7 +563,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res = cmpNumbers(LI->getSyncScopeID(),
cast<LoadInst>(R)->getSyncScopeID()))
return Res;
- return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),
+ return cmpRangeMetadata(
+ LI->getMetadata(LLVMContext::MD_range),
cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
}
if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
@@ -569,13 +582,13 @@ int FunctionComparator::cmpOperations(const Instruction *L,
}
if (const CmpInst *CI = dyn_cast<CmpInst>(L))
return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
- if (auto CSL = CallSite(const_cast<Instruction *>(L))) {
- auto CSR = CallSite(const_cast<Instruction *>(R));
- if (int Res = cmpNumbers(CSL.getCallingConv(), CSR.getCallingConv()))
+ if (auto *CBL = dyn_cast<CallBase>(L)) {
+ auto *CBR = cast<CallBase>(R);
+ if (int Res = cmpNumbers(CBL->getCallingConv(), CBR->getCallingConv()))
return Res;
- if (int Res = cmpAttrs(CSL.getAttributes(), CSR.getAttributes()))
+ if (int Res = cmpAttrs(CBL->getAttributes(), CBR->getAttributes()))
return Res;
- if (int Res = cmpOperandBundlesSchema(L, R))
+ if (int Res = cmpOperandBundlesSchema(*CBL, *CBR))
return Res;
if (const CallInst *CI = dyn_cast<CallInst>(L))
if (int Res = cmpNumbers(CI->getTailCallKind(),
@@ -616,8 +629,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res = cmpNumbers(CXI->isVolatile(),
cast<AtomicCmpXchgInst>(R)->isVolatile()))
return Res;
- if (int Res = cmpNumbers(CXI->isWeak(),
- cast<AtomicCmpXchgInst>(R)->isWeak()))
+ if (int Res =
+ cmpNumbers(CXI->isWeak(), cast<AtomicCmpXchgInst>(R)->isWeak()))
return Res;
if (int Res =
cmpOrderings(CXI->getSuccessOrdering(),
@@ -638,11 +651,21 @@ int FunctionComparator::cmpOperations(const Instruction *L,
cast<AtomicRMWInst>(R)->isVolatile()))
return Res;
if (int Res = cmpOrderings(RMWI->getOrdering(),
- cast<AtomicRMWInst>(R)->getOrdering()))
+ cast<AtomicRMWInst>(R)->getOrdering()))
return Res;
return cmpNumbers(RMWI->getSyncScopeID(),
cast<AtomicRMWInst>(R)->getSyncScopeID());
}
+ if (const ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(L)) {
+ ArrayRef<int> LMask = SVI->getShuffleMask();
+ ArrayRef<int> RMask = cast<ShuffleVectorInst>(R)->getShuffleMask();
+ if (int Res = cmpNumbers(LMask.size(), RMask.size()))
+ return Res;
+ for (size_t i = 0, e = LMask.size(); i != e; ++i) {
+ if (int Res = cmpNumbers(LMask[i], RMask[i]))
+ return Res;
+ }
+ }
if (const PHINode *PNL = dyn_cast<PHINode>(L)) {
const PHINode *PNR = cast<PHINode>(R);
// Ensure that in addition to the incoming values being identical
@@ -675,8 +698,8 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
GEPR->accumulateConstantOffset(DL, OffsetR))
return cmpAPInts(OffsetL, OffsetR);
- if (int Res = cmpTypes(GEPL->getSourceElementType(),
- GEPR->getSourceElementType()))
+ if (int Res =
+ cmpTypes(GEPL->getSourceElementType(), GEPR->getSourceElementType()))
return Res;
if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
@@ -829,8 +852,8 @@ int FunctionComparator::compareSignature() const {
// Visit the arguments so that they get enumerated in the order they're
// passed in.
for (Function::const_arg_iterator ArgLI = FnL->arg_begin(),
- ArgRI = FnR->arg_begin(),
- ArgLE = FnL->arg_end();
+ ArgRI = FnR->arg_begin(),
+ ArgLE = FnL->arg_end();
ArgLI != ArgLE; ++ArgLI, ++ArgRI) {
if (cmpValues(&*ArgLI, &*ArgRI) != 0)
llvm_unreachable("Arguments repeat!");
@@ -897,9 +920,7 @@ public:
// Initialize to random constant, so the state isn't zero.
HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
- void add(uint64_t V) {
- Hash = hashing::detail::hash_16_bytes(Hash, V);
- }
+ void add(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
// No finishing is required, because the entire hash value is used.
uint64_t getHash() { return Hash; }
diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index 26d48ee0d23fa..8df7ae9563d8a 100644
--- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -212,13 +212,6 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
}
}
}
- // Check the summaries to see if the symbol gets resolved to a known local
- // definition.
- if (VI && VI.isDSOLocal()) {
- GV.setDSOLocal(true);
- if (GV.hasDLLImportStorageClass())
- GV.setDLLStorageClass(GlobalValue::DefaultStorageClass);
- }
}
// We should always have a ValueInfo (i.e. GV in index) for definitions when
@@ -280,6 +273,20 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
} else
GV.setLinkage(getLinkage(&GV, /* DoPromote */ false));
+ // When ClearDSOLocalOnDeclarations is true, clear dso_local if GV is
+ // converted to a declaration, to disable direct access. Don't do this if GV
+ // is implicitly dso_local due to a non-default visibility.
+ if (ClearDSOLocalOnDeclarations && GV.isDeclarationForLinker() &&
+ !GV.isImplicitDSOLocal()) {
+ GV.setDSOLocal(false);
+ } else if (VI && VI.isDSOLocal()) {
+ // If all summaries are dso_local, symbol gets resolved to a known local
+ // definition.
+ GV.setDSOLocal(true);
+ if (GV.hasDLLImportStorageClass())
+ GV.setDLLStorageClass(GlobalValue::DefaultStorageClass);
+ }
+
// Remove functions imported as available externally defs from comdats,
// as this is a declaration for the linker, and will be dropped eventually.
// It is illegal for comdats to contain declarations.
@@ -319,7 +326,9 @@ bool FunctionImportGlobalProcessing::run() {
}
bool llvm::renameModuleForThinLTO(Module &M, const ModuleSummaryIndex &Index,
+ bool ClearDSOLocalOnDeclarations,
SetVector<GlobalValue *> *GlobalsToImport) {
- FunctionImportGlobalProcessing ThinLTOProcessing(M, Index, GlobalsToImport);
+ FunctionImportGlobalProcessing ThinLTOProcessing(M, Index, GlobalsToImport,
+ ClearDSOLocalOnDeclarations);
return ThinLTOProcessing.run();
}
diff --git a/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index a2942869130d5..fe58f0e0fe400 100644
--- a/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -9,7 +9,6 @@
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalValue.h"
@@ -164,8 +163,8 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
if (MSI->isVolatile())
return true;
GS.StoredType = GlobalStatus::Stored;
- } else if (auto C = ImmutableCallSite(I)) {
- if (!C.isCallee(&U))
+ } else if (const auto *CB = dyn_cast<CallBase>(I)) {
+ if (!CB->isCallee(&U))
return true;
GS.IsLoaded = true;
} else {
diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
index 9192e74b9ace9..9d8f59d62d6d0 100644
--- a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
+++ b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
@@ -13,8 +13,12 @@
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -30,40 +34,6 @@ STATISTIC(NumVFDeclAdded,
STATISTIC(NumCompUsedAdded,
"Number of `@llvm.compiler.used` operands that have been added.");
-/// Helper function to map the TLI name to a strings that holds
-/// scalar-to-vector mapping.
-///
-/// _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>)
-///
-/// where:
-///
-/// <isa> = "_LLVM_"
-/// <mask> = "N". Note: TLI does not support masked interfaces.
-/// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor`
-/// field of the `VecDesc` struct.
-/// <vparams> = "v", as many as are the number of parameters of CI.
-/// <scalarname> = the name of the scalar function called by CI.
-/// <vectorname> = the name of the vector function mapped by the TLI.
-static std::string mangleTLIName(StringRef VectorName, const CallInst &CI,
- unsigned VF) {
- SmallString<256> Buffer;
- llvm::raw_svector_ostream Out(Buffer);
- Out << "_ZGV" << VFABI::_LLVM_ << "N" << VF;
- for (unsigned I = 0; I < CI.getNumArgOperands(); ++I)
- Out << "v";
- Out << "_" << CI.getCalledFunction()->getName() << "(" << VectorName << ")";
- return Out.str();
-}
-
-/// A helper function for converting Scalar types to vector types.
-/// If the incoming type is void, we return void. If the VF is 1, we return
-/// the scalar type.
-static Type *ToVectorTy(Type *Scalar, unsigned VF, bool isScalable = false) {
- if (Scalar->isVoidTy() || VF == 1)
- return Scalar;
- return VectorType::get(Scalar, {VF, isScalable});
-}
-
/// A helper function that adds the vector function declaration that
/// vectorizes the CallInst CI with a vectorization factor of VF
/// lanes. The TLI assumes that all parameters and the return type of
@@ -107,7 +77,7 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
if (CI.isNoBuiltin() || !CI.getCalledFunction())
return;
- const std::string ScalarName = CI.getCalledFunction()->getName();
+ const std::string ScalarName = std::string(CI.getCalledFunction()->getName());
// Nothing to be done if the TLI thinks the function is not
// vectorizable.
if (!TLI.isFunctionVectorizable(ScalarName))
@@ -120,9 +90,11 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
// All VFs in the TLI are powers of 2.
for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName); VF <= WidestVF;
VF *= 2) {
- const std::string TLIName = TLI.getVectorizedFunction(ScalarName, VF);
+ const std::string TLIName =
+ std::string(TLI.getVectorizedFunction(ScalarName, VF));
if (!TLIName.empty()) {
- std::string MangledName = mangleTLIName(TLIName, CI, VF);
+ std::string MangledName = VFABI::mangleTLIVectorName(
+ TLIName, ScalarName, CI.getNumArgOperands(), VF);
if (!OriginalSetOfMappings.count(MangledName)) {
Mappings.push_back(MangledName);
++NumCallInjected;
@@ -168,6 +140,12 @@ void InjectTLIMappingsLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addPreserved<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<LoopAccessLegacyAnalysis>();
+ AU.addPreserved<DemandedBitsWrapperPass>();
+ AU.addPreserved<OptimizationRemarkEmitterWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 6da612eb4e658..b0b7ca4847980 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -34,7 +34,6 @@
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
@@ -60,6 +59,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
@@ -79,16 +79,23 @@ EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),
cl::Hidden,
cl::desc("Convert noalias attributes to metadata during inlining."));
+// Disabled by default, because the added alignment assumptions may increase
+// compile-time and block optimizations. This option is not suitable for use
+// with frontends that emit comprehensive parameter alignment annotations.
static cl::opt<bool>
PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
- cl::init(true), cl::Hidden,
+ cl::init(false), cl::Hidden,
cl::desc("Convert align attributes to assumptions during inlining."));
-llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI,
- AAResults *CalleeAAR,
- bool InsertLifetime) {
- return InlineFunction(CallSite(CB), IFI, CalleeAAR, InsertLifetime);
-}
+static cl::opt<bool> UpdateReturnAttributes(
+ "update-return-attrs", cl::init(true), cl::Hidden,
+ cl::desc("Update return attributes on calls within inlined body"));
+
+static cl::opt<unsigned> InlinerAttributeWindow(
+ "max-inst-checked-for-throw-during-inlining", cl::Hidden,
+ cl::desc("the maximum number of instructions analyzed for may throw during "
+ "attribute inference in inlined body"),
+ cl::init(4));
namespace {
@@ -530,7 +537,7 @@ static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
// instructions require no special handling.
CallInst *CI = dyn_cast<CallInst>(I);
- if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))
+ if (!CI || CI->doesNotThrow() || CI->isInlineAsm())
continue;
// We do not need to (and in fact, cannot) convert possibly throwing calls
@@ -767,12 +774,10 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
/// When inlining a call site that has !llvm.mem.parallel_loop_access or
/// llvm.access.group metadata, that metadata should be propagated to all
/// memory-accessing cloned instructions.
-static void PropagateParallelLoopAccessMetadata(CallSite CS,
+static void PropagateParallelLoopAccessMetadata(CallBase &CB,
ValueToValueMapTy &VMap) {
- MDNode *M =
- CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
- MDNode *CallAccessGroup =
- CS.getInstruction()->getMetadata(LLVMContext::MD_access_group);
+ MDNode *M = CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access);
+ MDNode *CallAccessGroup = CB.getMetadata(LLVMContext::MD_access_group);
if (!M && !CallAccessGroup)
return;
@@ -810,8 +815,8 @@ static void PropagateParallelLoopAccessMetadata(CallSite CS,
/// not be differentiated (and this would lead to miscompiles because the
/// non-aliasing property communicated by the metadata could have
/// call-site-specific control dependencies).
-static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
- const Function *CalledFunc = CS.getCalledFunction();
+static void CloneAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap) {
+ const Function *CalledFunc = CB.getCalledFunction();
SetVector<const MDNode *> MD;
// Note: We could only clone the metadata if it is already used in the
@@ -886,13 +891,11 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
// If the call site also had alias scope metadata (a list of scopes to
// which instructions inside it might belong), propagate those scopes to
// the inlined instructions.
- if (MDNode *CSM =
- CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope))
+ if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_alias_scope))
NewMD = MDNode::concatenate(NewMD, CSM);
NI->setMetadata(LLVMContext::MD_alias_scope, NewMD);
} else if (NI->mayReadOrWriteMemory()) {
- if (MDNode *M =
- CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope))
+ if (MDNode *M = CB.getMetadata(LLVMContext::MD_alias_scope))
NI->setMetadata(LLVMContext::MD_alias_scope, M);
}
@@ -901,12 +904,11 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
// If the call site also had noalias metadata (a list of scopes with
// which instructions inside it don't alias), propagate those scopes to
// the inlined instructions.
- if (MDNode *CSM =
- CS.getInstruction()->getMetadata(LLVMContext::MD_noalias))
+ if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_noalias))
NewMD = MDNode::concatenate(NewMD, CSM);
NI->setMetadata(LLVMContext::MD_noalias, NewMD);
} else if (NI->mayReadOrWriteMemory()) {
- if (MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_noalias))
+ if (MDNode *M = CB.getMetadata(LLVMContext::MD_noalias))
NI->setMetadata(LLVMContext::MD_noalias, M);
}
}
@@ -916,16 +918,16 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
/// then add new alias scopes for each noalias argument, tag the mapped noalias
/// parameters with noalias metadata specifying the new scope, and tag all
/// non-derived loads, stores and memory intrinsics with the new alias scopes.
-static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
+static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
const DataLayout &DL, AAResults *CalleeAAR) {
if (!EnableNoAliasConversion)
return;
- const Function *CalledFunc = CS.getCalledFunction();
+ const Function *CalledFunc = CB.getCalledFunction();
SmallVector<const Argument *, 4> NoAliasArgs;
for (const Argument &Arg : CalledFunc->args())
- if (Arg.hasNoAliasAttr() && !Arg.use_empty())
+ if (CB.paramHasAttr(Arg.getArgNo(), Attribute::NoAlias) && !Arg.use_empty())
NoAliasArgs.push_back(&Arg);
if (NoAliasArgs.empty())
@@ -951,7 +953,7 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) {
const Argument *A = NoAliasArgs[i];
- std::string Name = CalledFunc->getName();
+ std::string Name = std::string(CalledFunc->getName());
if (A->hasName()) {
Name += ": %";
Name += A->getName();
@@ -1002,8 +1004,7 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
IsFuncCall = true;
if (CalleeAAR) {
FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(Call);
- if (MRB == FMRB_OnlyAccessesArgumentPointees ||
- MRB == FMRB_OnlyReadsArgumentPointees)
+ if (AAResults::onlyAccessesArgPointees(MRB))
IsArgMemOnlyCall = true;
}
@@ -1059,7 +1060,7 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
// completely describe the aliasing properties using alias.scope
// metadata (and, thus, won't add any).
if (const Argument *A = dyn_cast<Argument>(V)) {
- if (!A->hasNoAliasAttr())
+ if (!CB.paramHasAttr(A->getArgNo(), Attribute::NoAlias))
UsesAliasingPtr = true;
} else {
UsesAliasingPtr = true;
@@ -1136,37 +1137,128 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
}
}
+static bool MayContainThrowingOrExitingCall(Instruction *Begin,
+ Instruction *End) {
+
+ assert(Begin->getParent() == End->getParent() &&
+ "Expected to be in same basic block!");
+ unsigned NumInstChecked = 0;
+ // Check that all instructions in the range [Begin, End) are guaranteed to
+ // transfer execution to successor.
+ for (auto &I : make_range(Begin->getIterator(), End->getIterator()))
+ if (NumInstChecked++ > InlinerAttributeWindow ||
+ !isGuaranteedToTransferExecutionToSuccessor(&I))
+ return true;
+ return false;
+}
+
+static AttrBuilder IdentifyValidAttributes(CallBase &CB) {
+
+ AttrBuilder AB(CB.getAttributes(), AttributeList::ReturnIndex);
+ if (AB.empty())
+ return AB;
+ AttrBuilder Valid;
+ // Only allow these white listed attributes to be propagated back to the
+ // callee. This is because other attributes may only be valid on the call
+ // itself, i.e. attributes such as signext and zeroext.
+ if (auto DerefBytes = AB.getDereferenceableBytes())
+ Valid.addDereferenceableAttr(DerefBytes);
+ if (auto DerefOrNullBytes = AB.getDereferenceableOrNullBytes())
+ Valid.addDereferenceableOrNullAttr(DerefOrNullBytes);
+ if (AB.contains(Attribute::NoAlias))
+ Valid.addAttribute(Attribute::NoAlias);
+ if (AB.contains(Attribute::NonNull))
+ Valid.addAttribute(Attribute::NonNull);
+ return Valid;
+}
+
+static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
+ if (!UpdateReturnAttributes)
+ return;
+
+ AttrBuilder Valid = IdentifyValidAttributes(CB);
+ if (Valid.empty())
+ return;
+ auto *CalledFunction = CB.getCalledFunction();
+ auto &Context = CalledFunction->getContext();
+
+ for (auto &BB : *CalledFunction) {
+ auto *RI = dyn_cast<ReturnInst>(BB.getTerminator());
+ if (!RI || !isa<CallBase>(RI->getOperand(0)))
+ continue;
+ auto *RetVal = cast<CallBase>(RI->getOperand(0));
+ // Sanity check that the cloned RetVal exists and is a call, otherwise we
+ // cannot add the attributes on the cloned RetVal.
+ // Simplification during inlining could have transformed the cloned
+ // instruction.
+ auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal));
+ if (!NewRetVal)
+ continue;
+ // Backward propagation of attributes to the returned value may be incorrect
+ // if it is control flow dependent.
+ // Consider:
+ // @callee {
+ // %rv = call @foo()
+ // %rv2 = call @bar()
+ // if (%rv2 != null)
+ // return %rv2
+ // if (%rv == null)
+ // exit()
+ // return %rv
+ // }
+ // caller() {
+ // %val = call nonnull @callee()
+ // }
+ // Here we cannot add the nonnull attribute on either foo or bar. So, we
+ // limit the check to both RetVal and RI are in the same basic block and
+ // there are no throwing/exiting instructions between these instructions.
+ if (RI->getParent() != RetVal->getParent() ||
+ MayContainThrowingOrExitingCall(RetVal, RI))
+ continue;
+ // Add to the existing attributes of NewRetVal, i.e. the cloned call
+ // instruction.
+ // NB! When we have the same attribute already existing on NewRetVal, but
+ // with a differing value, the AttributeList's merge API honours the already
+ // existing attribute value (i.e. attributes such as dereferenceable,
+ // dereferenceable_or_null etc). See AttrBuilder::merge for more details.
+ AttributeList AL = NewRetVal->getAttributes();
+ AttributeList NewAL =
+ AL.addAttributes(Context, AttributeList::ReturnIndex, Valid);
+ NewRetVal->setAttributes(NewAL);
+ }
+}
+
/// If the inlined function has non-byval align arguments, then
/// add @llvm.assume-based alignment assumptions to preserve this information.
-static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
+static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache)
return;
- AssumptionCache *AC = &(*IFI.GetAssumptionCache)(*CS.getCaller());
- auto &DL = CS.getCaller()->getParent()->getDataLayout();
+ AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller());
+ auto &DL = CB.getCaller()->getParent()->getDataLayout();
// To avoid inserting redundant assumptions, we should check for assumptions
// already in the caller. To do this, we might need a DT of the caller.
DominatorTree DT;
bool DTCalculated = false;
- Function *CalledFunc = CS.getCalledFunction();
+ Function *CalledFunc = CB.getCalledFunction();
for (Argument &Arg : CalledFunc->args()) {
unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0;
- if (Align && !Arg.hasByValOrInAllocaAttr() && !Arg.hasNUses(0)) {
+ if (Align && !Arg.hasPassPointeeByValueAttr() && !Arg.hasNUses(0)) {
if (!DTCalculated) {
- DT.recalculate(*CS.getCaller());
+ DT.recalculate(*CB.getCaller());
DTCalculated = true;
}
// If we can already prove the asserted alignment in the context of the
// caller, then don't bother inserting the assumption.
- Value *ArgVal = CS.getArgument(Arg.getArgNo());
- if (getKnownAlignment(ArgVal, DL, CS.getInstruction(), AC, &DT) >= Align)
+ Value *ArgVal = CB.getArgOperand(Arg.getArgNo());
+ if (getKnownAlignment(ArgVal, DL, &CB, AC, &DT) >= Align)
continue;
- CallInst *NewAsmp = IRBuilder<>(CS.getInstruction())
- .CreateAlignmentAssumption(DL, ArgVal, Align);
+ CallInst *NewAsmp =
+ IRBuilder<>(&CB).CreateAlignmentAssumption(DL, ArgVal, Align);
AC->registerAssumption(NewAsmp);
}
}
@@ -1176,13 +1268,13 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
/// update the specified callgraph to reflect the changes we made.
/// Note that it's possible that not all code was copied over, so only
/// some edges of the callgraph may remain.
-static void UpdateCallGraphAfterInlining(CallSite CS,
+static void UpdateCallGraphAfterInlining(CallBase &CB,
Function::iterator FirstNewBlock,
ValueToValueMapTy &VMap,
InlineFunctionInfo &IFI) {
CallGraph &CG = *IFI.CG;
- const Function *Caller = CS.getCaller();
- const Function *Callee = CS.getCalledFunction();
+ const Function *Caller = CB.getCaller();
+ const Function *Callee = CB.getCalledFunction();
CallGraphNode *CalleeNode = CG[Callee];
CallGraphNode *CallerNode = CG[Caller];
@@ -1199,7 +1291,11 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
}
for (; I != E; ++I) {
- const Value *OrigCall = I->first;
+ // Skip 'refererence' call records.
+ if (!I->first)
+ continue;
+
+ const Value *OrigCall = *I->first;
ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
// Only copy the edge if the call was inlined!
@@ -1240,7 +1336,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
// Update the call graph by deleting the edge from Callee to Caller. We must
// do this after the loop above in case Caller and Callee are the same.
- CallerNode->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction()));
+ CallerNode->removeCallEdgeFor(*cast<CallBase>(&CB));
}
static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
@@ -1254,8 +1350,8 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
// Always generate a memcpy of alignment 1 here because we don't know
// the alignment of the src pointer. Other optimizations can infer
// better alignment.
- Builder.CreateMemCpy(Dst, /*DstAlign*/ Align::None(), Src,
- /*SrcAlign*/ Align::None(), Size);
+ Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src,
+ /*SrcAlign*/ Align(1), Size);
}
/// When inlining a call site that has a byval argument,
@@ -1281,12 +1377,12 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
return Arg;
AssumptionCache *AC =
- IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr;
+ IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
// If the pointer is already known to be sufficiently aligned, or if we can
// round it up to a larger alignment, then we don't need a temporary.
- if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, AC) >=
- ByValAlignment)
+ if (getOrEnforceKnownAlignment(Arg, Align(ByValAlignment), DL, TheCall,
+ AC) >= ByValAlignment)
return Arg;
// Otherwise, we have to make a memcpy to get a safe alignment. This is bad
@@ -1356,34 +1452,6 @@ static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
IA);
}
-/// Returns the LoopID for a loop which has has been cloned from another
-/// function for inlining with the new inlined-at start and end locs.
-static MDNode *inlineLoopID(const MDNode *OrigLoopId, DILocation *InlinedAt,
- LLVMContext &Ctx,
- DenseMap<const MDNode *, MDNode *> &IANodes) {
- assert(OrigLoopId && OrigLoopId->getNumOperands() > 0 &&
- "Loop ID needs at least one operand");
- assert(OrigLoopId && OrigLoopId->getOperand(0).get() == OrigLoopId &&
- "Loop ID should refer to itself");
-
- // Save space for the self-referential LoopID.
- SmallVector<Metadata *, 4> MDs = {nullptr};
-
- for (unsigned i = 1; i < OrigLoopId->getNumOperands(); ++i) {
- Metadata *MD = OrigLoopId->getOperand(i);
- // Update the DILocations to encode the inlined-at metadata.
- if (DILocation *DL = dyn_cast<DILocation>(MD))
- MDs.push_back(inlineDebugLoc(DL, InlinedAt, Ctx, IANodes));
- else
- MDs.push_back(MD);
- }
-
- MDNode *NewLoopID = MDNode::getDistinct(Ctx, MDs);
- // Insert the self-referential LoopID.
- NewLoopID->replaceOperandWith(0, NewLoopID);
- return NewLoopID;
-}
-
/// Update inlined instructions' line numbers to
/// to encode location where these instructions are inlined.
static void fixupLineNumbers(Function *Fn, Function::iterator FI,
@@ -1415,11 +1483,11 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
BI != BE; ++BI) {
// Loop metadata needs to be updated so that the start and end locs
// reference inlined-at locations.
- if (MDNode *LoopID = BI->getMetadata(LLVMContext::MD_loop)) {
- MDNode *NewLoopID =
- inlineLoopID(LoopID, InlinedAtNode, BI->getContext(), IANodes);
- BI->setMetadata(LLVMContext::MD_loop, NewLoopID);
- }
+ auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode, &IANodes](
+ const DILocation &Loc) -> DILocation * {
+ return inlineDebugLoc(&Loc, InlinedAtNode, Ctx, IANodes).get();
+ };
+ updateLoopMetadataDebugLocations(*BI, updateLoopInfoLoc);
if (!NoInlineLineTables)
if (DebugLoc DL = BI->getDebugLoc()) {
@@ -1498,8 +1566,7 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock,
/// Update the branch metadata for cloned call instructions.
static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
const ProfileCount &CalleeEntryCount,
- const Instruction *TheCall,
- ProfileSummaryInfo *PSI,
+ const CallBase &TheCall, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *CallerBFI) {
if (!CalleeEntryCount.hasValue() || CalleeEntryCount.isSynthetic() ||
CalleeEntryCount.getCount() < 1)
@@ -1557,31 +1624,29 @@ void llvm::updateProfileCallee(
/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
/// exists in the instruction stream. Similarly this will inline a recursive
/// function by one level.
-llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
+llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
AAResults *CalleeAAR,
bool InsertLifetime,
Function *ForwardVarArgsTo) {
- Instruction *TheCall = CS.getInstruction();
- assert(TheCall->getParent() && TheCall->getFunction()
- && "Instruction not in function!");
+ assert(CB.getParent() && CB.getFunction() && "Instruction not in function!");
// FIXME: we don't inline callbr yet.
- if (isa<CallBrInst>(TheCall))
- return false;
+ if (isa<CallBrInst>(CB))
+ return InlineResult::failure("We don't inline callbr yet.");
// If IFI has any state in it, zap it before we fill it in.
IFI.reset();
- Function *CalledFunc = CS.getCalledFunction();
+ Function *CalledFunc = CB.getCalledFunction();
if (!CalledFunc || // Can't inline external function or indirect
CalledFunc->isDeclaration()) // call!
- return "external or indirect";
+ return InlineResult::failure("external or indirect");
// The inliner does not know how to inline through calls with operand bundles
// in general ...
- if (CS.hasOperandBundles()) {
- for (int i = 0, e = CS.getNumOperandBundles(); i != e; ++i) {
- uint32_t Tag = CS.getOperandBundleAt(i).getTagID();
+ if (CB.hasOperandBundles()) {
+ for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) {
+ uint32_t Tag = CB.getOperandBundleAt(i).getTagID();
// ... but it knows how to inline through "deopt" operand bundles ...
if (Tag == LLVMContext::OB_deopt)
continue;
@@ -1589,15 +1654,15 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
if (Tag == LLVMContext::OB_funclet)
continue;
- return "unsupported operand bundle";
+ return InlineResult::failure("unsupported operand bundle");
}
}
// If the call to the callee cannot throw, set the 'nounwind' flag on any
// calls that we inline.
- bool MarkNoUnwind = CS.doesNotThrow();
+ bool MarkNoUnwind = CB.doesNotThrow();
- BasicBlock *OrigBB = TheCall->getParent();
+ BasicBlock *OrigBB = CB.getParent();
Function *Caller = OrigBB->getParent();
// GC poses two hazards to inlining, which only occur when the callee has GC:
@@ -1608,7 +1673,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
if (!Caller->hasGC())
Caller->setGC(CalledFunc->getGC());
else if (CalledFunc->getGC() != Caller->getGC())
- return "incompatible GC";
+ return InlineResult::failure("incompatible GC");
}
// Get the personality function from the callee if it contains a landing pad.
@@ -1632,7 +1697,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// TODO: This isn't 100% true. Some personality functions are proper
// supersets of others and can be used in place of the other.
else if (CalledPersonality != CallerPersonality)
- return "incompatible personality";
+ return InlineResult::failure("incompatible personality");
}
// We need to figure out which funclet the callsite was in so that we may
@@ -1642,7 +1707,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
EHPersonality Personality = classifyEHPersonality(CallerPersonality);
if (isScopedEHPersonality(Personality)) {
Optional<OperandBundleUse> ParentFunclet =
- CS.getOperandBundle(LLVMContext::OB_funclet);
+ CB.getOperandBundle(LLVMContext::OB_funclet);
if (ParentFunclet)
CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front());
@@ -1657,7 +1722,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// for catchpads.
for (const BasicBlock &CalledBB : *CalledFunc) {
if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI()))
- return "catch in cleanup funclet";
+ return InlineResult::failure("catch in cleanup funclet");
}
}
} else if (isAsynchronousEHPersonality(Personality)) {
@@ -1665,7 +1730,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// funclet in the callee.
for (const BasicBlock &CalledBB : *CalledFunc) {
if (CalledBB.isEHPad())
- return "SEH in cleanup funclet";
+ return InlineResult::failure("SEH in cleanup funclet");
}
}
}
@@ -1675,7 +1740,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Determine if we are dealing with a call in an EHPad which does not unwind
// to caller.
bool EHPadForCallUnwindsLocally = false;
- if (CallSiteEHPad && CS.isCall()) {
+ if (CallSiteEHPad && isa<CallInst>(CB)) {
UnwindDestMemoTy FuncletUnwindMap;
Value *CallSiteUnwindDestToken =
getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap);
@@ -1704,7 +1769,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Calculate the vector of arguments to pass into the function cloner, which
// matches up the formal to the actual argument values.
- CallSite::arg_iterator AI = CS.arg_begin();
+ auto AI = CB.arg_begin();
unsigned ArgNo = 0;
for (Function::arg_iterator I = CalledFunc->arg_begin(),
E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
@@ -1714,8 +1779,8 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// by them explicit. However, we don't do this if the callee is readonly
// or readnone, because the copy would be unneeded: the callee doesn't
// modify the struct.
- if (CS.isByValArgument(ArgNo)) {
- ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
+ if (CB.isByValArgument(ArgNo)) {
+ ActualArg = HandleByValArgument(ActualArg, &CB, CalledFunc, IFI,
CalledFunc->getParamAlignment(ArgNo));
if (ActualArg != *AI)
ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
@@ -1724,10 +1789,17 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
VMap[&*I] = ActualArg;
}
+ // TODO: Remove this when users have been updated to the assume bundles.
// Add alignment assumptions if necessary. We do this before the inlined
// instructions are actually cloned into the caller so that we can easily
// check what will be known at the start of the inlined code.
- AddAlignmentAssumptions(CS, IFI);
+ AddAlignmentAssumptions(CB, IFI);
+
+ AssumptionCache *AC =
+ IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
+
+ /// Preserve all attributes on of the call and its parameters.
+ salvageKnowledge(&CB, AC);
// We want the inliner to prune the code as it copies. We would LOVE to
// have no dead or constant instructions leftover after inlining occurs
@@ -1735,7 +1807,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// happy with whatever the cloner can do.
CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
/*ModuleLevelChanges=*/false, Returns, ".i",
- &InlinedFunctionInfo, TheCall);
+ &InlinedFunctionInfo, &CB);
// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;
@@ -1744,7 +1816,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
CalledFunc->front());
- updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall,
+ updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), CB,
IFI.PSI, IFI.CallerBFI);
// Inject byval arguments initialization.
@@ -1753,21 +1825,22 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
&*FirstNewBlock, IFI);
Optional<OperandBundleUse> ParentDeopt =
- CS.getOperandBundle(LLVMContext::OB_deopt);
+ CB.getOperandBundle(LLVMContext::OB_deopt);
if (ParentDeopt) {
SmallVector<OperandBundleDef, 2> OpDefs;
for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
- Instruction *I = dyn_cast_or_null<Instruction>(VH);
- if (!I) continue; // instruction was DCE'd or RAUW'ed to undef
+ CallBase *ICS = dyn_cast_or_null<CallBase>(VH);
+ if (!ICS)
+ continue; // instruction was DCE'd or RAUW'ed to undef
OpDefs.clear();
- CallSite ICS(I);
- OpDefs.reserve(ICS.getNumOperandBundles());
+ OpDefs.reserve(ICS->getNumOperandBundles());
- for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) {
- auto ChildOB = ICS.getOperandBundleAt(i);
+ for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe;
+ ++COBi) {
+ auto ChildOB = ICS->getOperandBundleAt(COBi);
if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
// If the inlined call has other operand bundles, let them be
OpDefs.emplace_back(ChildOB);
@@ -1791,51 +1864,48 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs));
}
- Instruction *NewI = nullptr;
- if (isa<CallInst>(I))
- NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I);
- else if (isa<CallBrInst>(I))
- NewI = CallBrInst::Create(cast<CallBrInst>(I), OpDefs, I);
- else
- NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I);
+ Instruction *NewI = CallBase::Create(ICS, OpDefs, ICS);
// Note: the RAUW does the appropriate fixup in VMap, so we need to do
// this even if the call returns void.
- I->replaceAllUsesWith(NewI);
+ ICS->replaceAllUsesWith(NewI);
VH = nullptr;
- I->eraseFromParent();
+ ICS->eraseFromParent();
}
}
// Update the callgraph if requested.
if (IFI.CG)
- UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
+ UpdateCallGraphAfterInlining(CB, FirstNewBlock, VMap, IFI);
// For 'nodebug' functions, the associated DISubprogram is always null.
// Conservatively avoid propagating the callsite debug location to
// instructions inlined from a function whose DISubprogram is not null.
- fixupLineNumbers(Caller, FirstNewBlock, TheCall,
+ fixupLineNumbers(Caller, FirstNewBlock, &CB,
CalledFunc->getSubprogram() != nullptr);
// Clone existing noalias metadata if necessary.
- CloneAliasScopeMetadata(CS, VMap);
+ CloneAliasScopeMetadata(CB, VMap);
// Add noalias metadata if necessary.
- AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR);
+ AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR);
+
+ // Clone return attributes on the callsite into the calls within the inlined
+ // function which feed into its return value.
+ AddReturnAttributes(CB, VMap);
// Propagate llvm.mem.parallel_loop_access if necessary.
- PropagateParallelLoopAccessMetadata(CS, VMap);
+ PropagateParallelLoopAccessMetadata(CB, VMap);
// Register any cloned assumptions.
if (IFI.GetAssumptionCache)
for (BasicBlock &NewBlock :
make_range(FirstNewBlock->getIterator(), Caller->end()))
- for (Instruction &I : NewBlock) {
+ for (Instruction &I : NewBlock)
if (auto *II = dyn_cast<IntrinsicInst>(&I))
if (II->getIntrinsicID() == Intrinsic::assume)
- (*IFI.GetAssumptionCache)(*Caller).registerAssumption(II);
- }
+ IFI.GetAssumptionCache(*Caller).registerAssumption(II);
}
// If there are any alloca instructions in the block that used to be the entry
@@ -1877,24 +1947,20 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
Caller->getEntryBlock().getInstList().splice(
InsertPoint, FirstNewBlock->getInstList(), AI->getIterator(), I);
}
- // Move any dbg.declares describing the allocas into the entry basic block.
- DIBuilder DIB(*Caller->getParent());
- for (auto &AI : IFI.StaticAllocas)
- replaceDbgDeclareForAlloca(AI, AI, DIB, DIExpression::ApplyOffset, 0);
}
SmallVector<Value*,4> VarArgsToForward;
SmallVector<AttributeSet, 4> VarArgsAttrs;
for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
- i < CS.getNumArgOperands(); i++) {
- VarArgsToForward.push_back(CS.getArgOperand(i));
- VarArgsAttrs.push_back(CS.getAttributes().getParamAttributes(i));
+ i < CB.getNumArgOperands(); i++) {
+ VarArgsToForward.push_back(CB.getArgOperand(i));
+ VarArgsAttrs.push_back(CB.getAttributes().getParamAttributes(i));
}
bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
if (InlinedFunctionInfo.ContainsCalls) {
CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
- if (CallInst *CI = dyn_cast<CallInst>(TheCall))
+ if (CallInst *CI = dyn_cast<CallInst>(&CB))
CallSiteTailKind = CI->getTailCallKind();
// For inlining purposes, the "notail" marker is the same as no marker.
@@ -2056,7 +2122,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// any call instructions into invoke instructions. This is sensitive to which
// funclet pads were top-level in the inlinee, so must be done before
// rewriting the "parent pad" links.
- if (auto *II = dyn_cast<InvokeInst>(TheCall)) {
+ if (auto *II = dyn_cast<InvokeInst>(&CB)) {
BasicBlock *UnwindDest = II->getUnwindDest();
Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();
if (isa<LandingPadInst>(FirstNonPHI)) {
@@ -2077,31 +2143,24 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Add bundle operands to any top-level call sites.
SmallVector<OperandBundleDef, 1> OpBundles;
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) {
- Instruction *I = &*BBI++;
- CallSite CS(I);
- if (!CS)
+ CallBase *I = dyn_cast<CallBase>(&*BBI++);
+ if (!I)
continue;
// Skip call sites which are nounwind intrinsics.
auto *CalledFn =
- dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
- if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow())
+ dyn_cast<Function>(I->getCalledOperand()->stripPointerCasts());
+ if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow())
continue;
// Skip call sites which already have a "funclet" bundle.
- if (CS.getOperandBundle(LLVMContext::OB_funclet))
+ if (I->getOperandBundle(LLVMContext::OB_funclet))
continue;
- CS.getOperandBundlesAsDefs(OpBundles);
+ I->getOperandBundlesAsDefs(OpBundles);
OpBundles.emplace_back("funclet", CallSiteEHPad);
- Instruction *NewInst;
- if (CS.isCall())
- NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I);
- else if (CS.isCallBr())
- NewInst = CallBrInst::Create(cast<CallBrInst>(I), OpBundles, I);
- else
- NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I);
+ Instruction *NewInst = CallBase::Create(I, OpBundles, I);
NewInst->takeName(I);
I->replaceAllUsesWith(NewInst);
I->eraseFromParent();
@@ -2138,7 +2197,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// caller (but terminate it instead). If the caller's return type does not
// match the callee's return type, we also need to change the return type of
// the intrinsic.
- if (Caller->getReturnType() == TheCall->getType()) {
+ if (Caller->getReturnType() == CB.getType()) {
auto NewEnd = llvm::remove_if(Returns, [](ReturnInst *RI) {
return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
});
@@ -2197,7 +2256,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
if (InlinedMustTailCalls) {
// Check if we need to bitcast the result of any musttail calls.
Type *NewRetTy = Caller->getReturnType();
- bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy;
+ bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy;
// Handle the returns preceded by musttail calls separately.
SmallVector<ReturnInst *, 8> NormalReturns;
@@ -2237,8 +2296,8 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
for (BasicBlock &NewBB :
make_range(FirstNewBlock->getIterator(), Caller->end()))
for (Instruction &I : NewBB)
- if (auto CS = CallSite(&I))
- IFI.InlinedCallSites.push_back(CS);
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ IFI.InlinedCallSites.push_back(CB);
}
// If we cloned in _exactly one_ basic block, and if that block ends in a
@@ -2246,36 +2305,35 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// the calling basic block.
if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
// Move all of the instructions right before the call.
- OrigBB->getInstList().splice(TheCall->getIterator(),
- FirstNewBlock->getInstList(),
+ OrigBB->getInstList().splice(CB.getIterator(), FirstNewBlock->getInstList(),
FirstNewBlock->begin(), FirstNewBlock->end());
// Remove the cloned basic block.
Caller->getBasicBlockList().pop_back();
// If the call site was an invoke instruction, add a branch to the normal
// destination.
- if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
- BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
+ BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), &CB);
NewBr->setDebugLoc(Returns[0]->getDebugLoc());
}
// If the return instruction returned a value, replace uses of the call with
// uses of the returned value.
- if (!TheCall->use_empty()) {
+ if (!CB.use_empty()) {
ReturnInst *R = Returns[0];
- if (TheCall == R->getReturnValue())
- TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ if (&CB == R->getReturnValue())
+ CB.replaceAllUsesWith(UndefValue::get(CB.getType()));
else
- TheCall->replaceAllUsesWith(R->getReturnValue());
+ CB.replaceAllUsesWith(R->getReturnValue());
}
// Since we are now done with the Call/Invoke, we can delete it.
- TheCall->eraseFromParent();
+ CB.eraseFromParent();
// Since we are now done with the return instruction, delete it also.
Returns[0]->eraseFromParent();
// We are now done with the inlining.
- return true;
+ return InlineResult::success();
}
// Otherwise, we have the normal case, of more than one block to inline or
@@ -2286,10 +2344,10 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// this is an invoke instruction or a call instruction.
BasicBlock *AfterCallBB;
BranchInst *CreatedBranchToNormalDest = nullptr;
- if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
// Add an unconditional branch to make this look like the CallInst case...
- CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);
+ CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), &CB);
// Split the basic block. This guarantees that no PHI nodes will have to be
// updated due to new incoming edges, and make the invoke case more
@@ -2298,11 +2356,11 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(),
CalledFunc->getName() + ".exit");
- } else { // It's a call
+ } else { // It's a call
// If this is a call instruction, we need to split the basic block that
// the call lives in.
//
- AfterCallBB = OrigBB->splitBasicBlock(TheCall->getIterator(),
+ AfterCallBB = OrigBB->splitBasicBlock(CB.getIterator(),
CalledFunc->getName() + ".exit");
}
@@ -2335,12 +2393,12 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
if (Returns.size() > 1) {
// The PHI node should go at the front of the new basic block to merge all
// possible incoming values.
- if (!TheCall->use_empty()) {
- PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
+ if (!CB.use_empty()) {
+ PHI = PHINode::Create(RTy, Returns.size(), CB.getName(),
&AfterCallBB->front());
// Anything that used the result of the function call should now use the
// PHI node as their operand.
- TheCall->replaceAllUsesWith(PHI);
+ CB.replaceAllUsesWith(PHI);
}
// Loop over all of the return instructions adding entries to the PHI node
@@ -2372,11 +2430,11 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
} else if (!Returns.empty()) {
// Otherwise, if there is exactly one return value, just replace anything
// using the return value of the call with the computed value.
- if (!TheCall->use_empty()) {
- if (TheCall == Returns[0]->getReturnValue())
- TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ if (!CB.use_empty()) {
+ if (&CB == Returns[0]->getReturnValue())
+ CB.replaceAllUsesWith(UndefValue::get(CB.getType()));
else
- TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
+ CB.replaceAllUsesWith(Returns[0]->getReturnValue());
}
// Update PHI nodes that use the ReturnBB to use the AfterCallBB.
@@ -2394,14 +2452,14 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Delete the return instruction now and empty ReturnBB now.
Returns[0]->eraseFromParent();
ReturnBB->eraseFromParent();
- } else if (!TheCall->use_empty()) {
+ } else if (!CB.use_empty()) {
// No returns, but something is using the return value of the call. Just
// nuke the result.
- TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ CB.replaceAllUsesWith(UndefValue::get(CB.getType()));
}
// Since we are now done with the Call/Invoke, we can delete it.
- TheCall->eraseFromParent();
+ CB.eraseFromParent();
// If we inlined any musttail calls and the original return is now
// unreachable, delete it. It can only contain a bitcast and ret.
@@ -2429,7 +2487,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// block other optimizations.
if (PHI) {
AssumptionCache *AC =
- IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr;
+ IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
auto &DL = Caller->getParent()->getDataLayout();
if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) {
PHI->replaceAllUsesWith(V);
@@ -2437,5 +2495,5 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
}
- return true;
+ return InlineResult::success();
}
diff --git a/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/llvm/lib/Transforms/Utils/InstructionNamer.cpp
index aac0b55801c46..8e339fe46d457 100644
--- a/llvm/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -42,7 +42,7 @@ namespace {
for (Instruction &I : BB)
if (!I.hasName() && !I.getType()->isVoidTy())
- I.setName("tmp");
+ I.setName("i");
}
return true;
}
diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp
index 5746d69260d50..b1a1c564d2171 100644
--- a/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -76,7 +76,7 @@ static bool isExitBlock(BasicBlock *BB,
/// that are outside the current loop. If so, insert LCSSA PHI nodes and
/// rewrite the uses.
bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
- DominatorTree &DT, LoopInfo &LI,
+ const DominatorTree &DT, const LoopInfo &LI,
ScalarEvolution *SE) {
SmallVector<Use *, 16> UsesToRewrite;
SmallSetVector<PHINode *, 16> PHIsToRemove;
@@ -128,7 +128,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
if (auto *Inv = dyn_cast<InvokeInst>(I))
DomBB = Inv->getNormalDest();
- DomTreeNode *DomNode = DT.getNode(DomBB);
+ const DomTreeNode *DomNode = DT.getNode(DomBB);
SmallVector<PHINode *, 16> AddedPHIs;
SmallVector<PHINode *, 8> PostProcessPHIs;
@@ -274,7 +274,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
// Compute the set of BasicBlocks in the loop `L` dominating at least one exit.
static void computeBlocksDominatingExits(
- Loop &L, DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks,
+ Loop &L, const DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks,
SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) {
SmallVector<BasicBlock *, 8> BBWorklist;
@@ -318,7 +318,7 @@ static void computeBlocksDominatingExits(
}
}
-bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
+bool llvm::formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
ScalarEvolution *SE) {
bool Changed = false;
@@ -383,8 +383,8 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
}
/// Process a loop nest depth first.
-bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
- ScalarEvolution *SE) {
+bool llvm::formLCSSARecursively(Loop &L, const DominatorTree &DT,
+ const LoopInfo *LI, ScalarEvolution *SE) {
bool Changed = false;
// Recurse depth-first through inner loops.
@@ -396,7 +396,7 @@ bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
}
/// Process all loops in the function, inner-most out.
-static bool formLCSSAOnAllLoops(LoopInfo *LI, DominatorTree &DT,
+static bool formLCSSAOnAllLoops(const LoopInfo *LI, const DominatorTree &DT,
ScalarEvolution *SE) {
bool Changed = false;
for (auto &L : *LI)
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index b2d511c7c9a97..da40c342af3ac 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -25,6 +25,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/EHPersonalities.h"
@@ -40,7 +41,6 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
@@ -75,6 +75,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
@@ -402,15 +403,29 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
II->getIntrinsicID() == Intrinsic::launder_invariant_group)
return true;
- // Lifetime intrinsics are dead when their right-hand is undef.
- if (II->isLifetimeStartOrEnd())
- return isa<UndefValue>(II->getArgOperand(1));
+ if (II->isLifetimeStartOrEnd()) {
+ auto *Arg = II->getArgOperand(1);
+ // Lifetime intrinsics are dead when their right-hand is undef.
+ if (isa<UndefValue>(Arg))
+ return true;
+ // If the right-hand is an alloc, global, or argument and the only uses
+ // are lifetime intrinsics then the intrinsics are dead.
+ if (isa<AllocaInst>(Arg) || isa<GlobalValue>(Arg) || isa<Argument>(Arg))
+ return llvm::all_of(Arg->uses(), [](Use &Use) {
+ if (IntrinsicInst *IntrinsicUse =
+ dyn_cast<IntrinsicInst>(Use.getUser()))
+ return IntrinsicUse->isLifetimeStartOrEnd();
+ return false;
+ });
+ return false;
+ }
// Assumptions are dead if their condition is trivially true. Guards on
// true are operationally no-ops. In the future we can consider more
// sophisticated tradeoffs for guards considering potential for check
// widening, but for now we keep things simple.
- if (II->getIntrinsicID() == Intrinsic::assume ||
+ if ((II->getIntrinsicID() == Intrinsic::assume &&
+ isAssumeWithEmptyBundle(*II)) ||
II->getIntrinsicID() == Intrinsic::experimental_guard) {
if (ConstantInt *Cond = dyn_cast<ConstantInt>(II->getArgOperand(0)))
return !Cond->isZero();
@@ -443,29 +458,49 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructions(
if (!I || !isInstructionTriviallyDead(I, TLI))
return false;
- SmallVector<Instruction*, 16> DeadInsts;
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
DeadInsts.push_back(I);
RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU);
return true;
}
+bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive(
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetLibraryInfo *TLI,
+ MemorySSAUpdater *MSSAU) {
+ unsigned S = 0, E = DeadInsts.size(), Alive = 0;
+ for (; S != E; ++S) {
+ auto *I = cast<Instruction>(DeadInsts[S]);
+ if (!isInstructionTriviallyDead(I)) {
+ DeadInsts[S] = nullptr;
+ ++Alive;
+ }
+ }
+ if (Alive == E)
+ return false;
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU);
+ return true;
+}
+
void llvm::RecursivelyDeleteTriviallyDeadInstructions(
- SmallVectorImpl<Instruction *> &DeadInsts, const TargetLibraryInfo *TLI,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetLibraryInfo *TLI,
MemorySSAUpdater *MSSAU) {
// Process the dead instruction list until empty.
while (!DeadInsts.empty()) {
- Instruction &I = *DeadInsts.pop_back_val();
- assert(I.use_empty() && "Instructions with uses are not dead.");
- assert(isInstructionTriviallyDead(&I, TLI) &&
+ Value *V = DeadInsts.pop_back_val();
+ Instruction *I = cast_or_null<Instruction>(V);
+ if (!I)
+ continue;
+ assert(isInstructionTriviallyDead(I, TLI) &&
"Live instruction found in dead worklist!");
+ assert(I->use_empty() && "Instructions with uses are not dead.");
// Don't lose the debug info while deleting the instructions.
- salvageDebugInfo(I);
+ salvageDebugInfo(*I);
// Null out all of the instruction's operands to see if any operand becomes
// dead as we go.
- for (Use &OpU : I.operands()) {
+ for (Use &OpU : I->operands()) {
Value *OpV = OpU.get();
OpU.set(nullptr);
@@ -480,9 +515,9 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(
DeadInsts.push_back(OpI);
}
if (MSSAU)
- MSSAU->removeMemoryAccess(&I);
+ MSSAU->removeMemoryAccess(I);
- I.eraseFromParent();
+ I->eraseFromParent();
}
}
@@ -521,19 +556,20 @@ static bool areAllUsesEqual(Instruction *I) {
/// delete it. If that makes any of its operands trivially dead, delete them
/// too, recursively. Return true if a change was made.
bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
- const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo *TLI,
+ llvm::MemorySSAUpdater *MSSAU) {
SmallPtrSet<Instruction*, 4> Visited;
for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects();
I = cast<Instruction>(*I->user_begin())) {
if (I->use_empty())
- return RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ return RecursivelyDeleteTriviallyDeadInstructions(I, TLI, MSSAU);
// If we find an instruction more than once, we're on a cycle that
// won't prove fruitful.
if (!Visited.insert(I).second) {
// Break the cycle and delete the instruction and its operands.
I->replaceAllUsesWith(UndefValue::get(I->getType()));
- (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI, MSSAU);
return true;
}
}
@@ -1132,9 +1168,8 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
/// often possible though. If alignment is important, a more reliable approach
/// is to simply align all global variables and allocation instructions to
/// their preferred alignment from the beginning.
-static unsigned enforceKnownAlignment(Value *V, unsigned Alignment,
- unsigned PrefAlign,
- const DataLayout &DL) {
+static Align enforceKnownAlignment(Value *V, Align Alignment, Align PrefAlign,
+ const DataLayout &DL) {
assert(PrefAlign > Alignment);
V = V->stripPointerCasts();
@@ -1146,21 +1181,21 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Alignment,
// stripPointerCasts recurses through infinite layers of bitcasts,
// while computeKnownBits is not allowed to traverse more than 6
// levels.
- Alignment = std::max(AI->getAlignment(), Alignment);
+ Alignment = std::max(AI->getAlign(), Alignment);
if (PrefAlign <= Alignment)
return Alignment;
// If the preferred alignment is greater than the natural stack alignment
// then don't round up. This avoids dynamic stack realignment.
- if (DL.exceedsNaturalStackAlignment(Align(PrefAlign)))
+ if (DL.exceedsNaturalStackAlignment(PrefAlign))
return Alignment;
- AI->setAlignment(MaybeAlign(PrefAlign));
+ AI->setAlignment(PrefAlign);
return PrefAlign;
}
if (auto *GO = dyn_cast<GlobalObject>(V)) {
// TODO: as above, this shouldn't be necessary.
- Alignment = std::max(GO->getAlignment(), Alignment);
+ Alignment = max(GO->getAlign(), Alignment);
if (PrefAlign <= Alignment)
return Alignment;
@@ -1171,18 +1206,18 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Alignment,
if (!GO->canIncreaseAlignment())
return Alignment;
- GO->setAlignment(MaybeAlign(PrefAlign));
+ GO->setAlignment(PrefAlign);
return PrefAlign;
}
return Alignment;
}
-unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
- const DataLayout &DL,
- const Instruction *CxtI,
- AssumptionCache *AC,
- const DominatorTree *DT) {
+Align llvm::getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign,
+ const DataLayout &DL,
+ const Instruction *CxtI,
+ AssumptionCache *AC,
+ const DominatorTree *DT) {
assert(V->getType()->isPointerTy() &&
"getOrEnforceKnownAlignment expects a pointer!");
@@ -1191,42 +1226,22 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
// Avoid trouble with ridiculously large TrailZ values, such as
// those computed from a null pointer.
- TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
-
- unsigned Align = 1u << std::min(Known.getBitWidth() - 1, TrailZ);
+ // LLVM doesn't support alignments larger than (1 << MaxAlignmentExponent).
+ TrailZ = std::min(TrailZ, +Value::MaxAlignmentExponent);
- // LLVM doesn't support alignments larger than this currently.
- Align = std::min(Align, +Value::MaximumAlignment);
+ Align Alignment = Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ));
- if (PrefAlign > Align)
- Align = enforceKnownAlignment(V, Align, PrefAlign, DL);
+ if (PrefAlign && *PrefAlign > Alignment)
+ Alignment = enforceKnownAlignment(V, Alignment, *PrefAlign, DL);
// We don't need to make any adjustment.
- return Align;
+ return Alignment;
}
///===---------------------------------------------------------------------===//
/// Dbg Intrinsic utilities
///
-/// See if there is a dbg.value intrinsic for DIVar before I.
-static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr,
- Instruction *I) {
- // Since we can't guarantee that the original dbg.declare instrinsic
- // is removed by LowerDbgDeclare(), we need to make sure that we are
- // not inserting the same dbg.value intrinsic over and over.
- BasicBlock::InstListType::iterator PrevI(I);
- if (PrevI != I->getParent()->getInstList().begin()) {
- --PrevI;
- if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI))
- if (DVI->getValue() == I->getOperand(0) &&
- DVI->getVariable() == DIVar &&
- DVI->getExpression() == DIExpr)
- return true;
- }
- return false;
-}
-
/// See if there is a dbg.value intrinsic for DIVar for the PHI node.
static bool PhiHasDebugValue(DILocalVariable *DIVar,
DIExpression *DIExpr,
@@ -1303,13 +1318,11 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
// know which part) we insert an dbg.value instrinsic to indicate that we
// know nothing about the variable's content.
DV = UndefValue::get(DV->getType());
- if (!LdStHasDebugValue(DIVar, DIExpr, SI))
- Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
+ Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
return;
}
- if (!LdStHasDebugValue(DIVar, DIExpr, SI))
- Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
+ Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
}
/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
@@ -1320,9 +1333,6 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
auto *DIExpr = DII->getExpression();
assert(DIVar && "Missing variable");
- if (LdStHasDebugValue(DIVar, DIExpr, LI))
- return;
-
if (!valueCoversEntireFragment(LI->getType(), DII)) {
// FIXME: If only referring to a part of the variable described by the
// dbg.declare, then we want to insert a dbg.value for the corresponding
@@ -1389,6 +1399,7 @@ static bool isStructure(AllocaInst *AI) {
/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
/// of llvm.dbg.value intrinsics.
bool llvm::LowerDbgDeclare(Function &F) {
+ bool Changed = false;
DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
SmallVector<DbgDeclareInst *, 4> Dbgs;
for (auto &FI : F)
@@ -1397,7 +1408,7 @@ bool llvm::LowerDbgDeclare(Function &F) {
Dbgs.push_back(DDI);
if (Dbgs.empty())
- return false;
+ return Changed;
for (auto &I : Dbgs) {
DbgDeclareInst *DDI = I;
@@ -1450,8 +1461,14 @@ bool llvm::LowerDbgDeclare(Function &F) {
}
}
DDI->eraseFromParent();
+ Changed = true;
}
- return true;
+
+ if (Changed)
+ for (BasicBlock &BB : F)
+ RemoveRedundantDbgInstrs(&BB);
+
+ return Changed;
}
/// Propagate dbg.value intrinsics through the newly inserted PHIs.
@@ -1521,6 +1538,14 @@ TinyPtrVector<DbgVariableIntrinsic *> llvm::FindDbgAddrUses(Value *V) {
return Declares;
}
+TinyPtrVector<DbgDeclareInst *> llvm::FindDbgDeclareUses(Value *V) {
+ TinyPtrVector<DbgDeclareInst *> DDIs;
+ for (DbgVariableIntrinsic *DVI : FindDbgAddrUses(V))
+ if (auto *DDI = dyn_cast<DbgDeclareInst>(DVI))
+ DDIs.push_back(DDI);
+ return DDIs;
+}
+
void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
// This function is hot. Check whether the value has any metadata to avoid a
// DenseMap lookup.
@@ -1547,8 +1572,8 @@ void llvm::findDbgUsers(SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers,
}
bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
- Instruction *InsertBefore, DIBuilder &Builder,
- uint8_t DIExprFlags, int Offset) {
+ DIBuilder &Builder, uint8_t DIExprFlags,
+ int Offset) {
auto DbgAddrs = FindDbgAddrUses(Address);
for (DbgVariableIntrinsic *DII : DbgAddrs) {
DebugLoc Loc = DII->getDebugLoc();
@@ -1556,23 +1581,14 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
auto *DIExpr = DII->getExpression();
assert(DIVar && "Missing variable");
DIExpr = DIExpression::prepend(DIExpr, DIExprFlags, Offset);
- // Insert llvm.dbg.declare immediately before InsertBefore, and remove old
+ // Insert llvm.dbg.declare immediately before DII, and remove old
// llvm.dbg.declare.
- Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore);
- if (DII == InsertBefore)
- InsertBefore = InsertBefore->getNextNode();
+ Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, DII);
DII->eraseFromParent();
}
return !DbgAddrs.empty();
}
-bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
- DIBuilder &Builder, uint8_t DIExprFlags,
- int Offset) {
- return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder,
- DIExprFlags, Offset);
-}
-
static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
DIBuilder &Builder, int Offset) {
DebugLoc Loc = DVI->getDebugLoc();
@@ -1612,23 +1628,18 @@ static MetadataAsValue *wrapValueInMetadata(LLVMContext &C, Value *V) {
return MetadataAsValue::get(C, ValueAsMetadata::get(V));
}
-bool llvm::salvageDebugInfo(Instruction &I) {
+/// Where possible to salvage debug information for \p I do so
+/// and return True. If not possible mark undef and return False.
+void llvm::salvageDebugInfo(Instruction &I) {
SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
findDbgUsers(DbgUsers, &I);
- if (DbgUsers.empty())
- return false;
-
- return salvageDebugInfoForDbgValues(I, DbgUsers);
-}
-
-void llvm::salvageDebugInfoOrMarkUndef(Instruction &I) {
- if (!salvageDebugInfo(I))
- replaceDbgUsesWithUndef(&I);
+ salvageDebugInfoForDbgValues(I, DbgUsers);
}
-bool llvm::salvageDebugInfoForDbgValues(
+void llvm::salvageDebugInfoForDbgValues(
Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers) {
auto &Ctx = I.getContext();
+ bool Salvaged = false;
auto wrapMD = [&](Value *V) { return wrapValueInMetadata(Ctx, V); };
for (auto *DII : DbgUsers) {
@@ -1643,14 +1654,22 @@ bool llvm::salvageDebugInfoForDbgValues(
// salvageDebugInfoImpl should fail on examining the first element of
// DbgUsers, or none of them.
if (!DIExpr)
- return false;
+ break;
DII->setOperand(0, wrapMD(I.getOperand(0)));
DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr));
LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
+ Salvaged = true;
}
- return true;
+ if (Salvaged)
+ return;
+
+ for (auto *DII : DbgUsers) {
+ Value *Undef = UndefValue::get(I.getType());
+ DII->setOperand(0, MetadataAsValue::get(DII->getContext(),
+ ValueAsMetadata::get(Undef)));
+ }
}
DIExpression *llvm::salvageDebugInfoImpl(Instruction &I,
@@ -1682,13 +1701,14 @@ DIExpression *llvm::salvageDebugInfoImpl(Instruction &I,
};
if (auto *CI = dyn_cast<CastInst>(&I)) {
- // No-op casts and zexts are irrelevant for debug info.
- if (CI->isNoopCast(DL) || isa<ZExtInst>(&I))
+ // No-op casts are irrelevant for debug info.
+ if (CI->isNoopCast(DL))
return SrcDIExpr;
Type *Type = CI->getType();
- // Casts other than Trunc or SExt to scalar types cannot be salvaged.
- if (Type->isVectorTy() || (!isa<TruncInst>(&I) && !isa<SExtInst>(&I)))
+ // Casts other than Trunc, SExt, or ZExt to scalar types cannot be salvaged.
+ if (Type->isVectorTy() ||
+ !(isa<TruncInst>(&I) || isa<SExtInst>(&I) || isa<ZExtInst>(&I)))
return nullptr;
Value *FromValue = CI->getOperand(0);
@@ -1805,7 +1825,7 @@ static bool rewriteDebugUsers(
if (!UndefOrSalvage.empty()) {
// Try to salvage the remaining debug users.
- salvageDebugInfoOrMarkUndef(From);
+ salvageDebugInfo(From);
Changed = true;
}
@@ -1960,11 +1980,23 @@ CallInst *llvm::createCallMatchingInvoke(InvokeInst *II) {
SmallVector<OperandBundleDef, 1> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
CallInst *NewCall = CallInst::Create(II->getFunctionType(),
- II->getCalledValue(), Args, OpBundles);
+ II->getCalledOperand(), Args, OpBundles);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
NewCall->setDebugLoc(II->getDebugLoc());
NewCall->copyMetadata(*II);
+
+ // If the invoke had profile metadata, try converting them for CallInst.
+ uint64_t TotalWeight;
+ if (NewCall->extractProfTotalWeight(TotalWeight)) {
+ // Set the total weight if it fits into i32, otherwise reset.
+ MDBuilder MDB(NewCall->getContext());
+ auto NewWeights = uint32_t(TotalWeight) != TotalWeight
+ ? nullptr
+ : MDB.createBranchWeights({uint32_t(TotalWeight)});
+ NewCall->setMetadata(LLVMContext::MD_prof, NewWeights);
+ }
+
return NewCall;
}
@@ -2011,7 +2043,7 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
// as of this time.
InvokeInst *II =
- InvokeInst::Create(CI->getFunctionType(), CI->getCalledValue(), Split,
+ InvokeInst::Create(CI->getFunctionType(), CI->getCalledOperand(), Split,
UnwindEdge, InvokeArgs, OpBundles, CI->getName(), BB);
II->setDebugLoc(CI->getDebugLoc());
II->setCallingConv(CI->getCallingConv());
@@ -2042,7 +2074,7 @@ static bool markAliveBlocks(Function &F,
// canonicalizes unreachable insts into stores to null or undef.
for (Instruction &I : *BB) {
if (auto *CI = dyn_cast<CallInst>(&I)) {
- Value *Callee = CI->getCalledValue();
+ Value *Callee = CI->getCalledOperand();
// Handle intrinsic calls.
if (Function *F = dyn_cast<Function>(Callee)) {
auto IntrinsicID = F->getIntrinsicID();
@@ -2117,7 +2149,7 @@ static bool markAliveBlocks(Function &F,
Instruction *Terminator = BB->getTerminator();
if (auto *II = dyn_cast<InvokeInst>(Terminator)) {
// Turn invokes that call 'nounwind' functions into ordinary calls.
- Value *Callee = II->getCalledValue();
+ Value *Callee = II->getCalledOperand();
if ((isa<ConstantPointerNull>(Callee) &&
!NullPointerIsDefined(BB->getParent())) ||
isa<UndefValue>(Callee)) {
@@ -2243,7 +2275,7 @@ bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
SmallSetVector<BasicBlock *, 8> DeadBlockSet;
for (BasicBlock &BB : F) {
// Skip reachable basic blocks
- if (Reachable.find(&BB) != Reachable.end())
+ if (Reachable.count(&BB))
continue;
DeadBlockSet.insert(&BB);
}
@@ -2548,7 +2580,7 @@ bool llvm::callsGCLeafFunction(const CallBase *Call,
// marked as 'gc-leaf-function.' All available Libcalls are
// GC-leaf.
LibFunc LF;
- if (TLI.getLibFunc(ImmutableCallSite(Call), LF)) {
+ if (TLI.getLibFunc(*Call, LF)) {
return TLI.has(LF);
}
@@ -2928,21 +2960,40 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
default:
return true;
case Instruction::Call:
- case Instruction::Invoke:
+ case Instruction::Invoke: {
+ const auto &CB = cast<CallBase>(*I);
+
// Can't handle inline asm. Skip it.
- if (isa<InlineAsm>(ImmutableCallSite(I).getCalledValue()))
- return false;
- // Many arithmetic intrinsics have no issue taking a
- // variable, however it's hard to distingish these from
- // specials such as @llvm.frameaddress that require a constant.
- if (isa<IntrinsicInst>(I))
+ if (CB.isInlineAsm())
return false;
// Constant bundle operands may need to retain their constant-ness for
// correctness.
- if (ImmutableCallSite(I).isBundleOperand(OpIdx))
+ if (CB.isBundleOperand(OpIdx))
return false;
- return true;
+
+ if (OpIdx < CB.getNumArgOperands()) {
+ // Some variadic intrinsics require constants in the variadic arguments,
+ // which currently aren't markable as immarg.
+ if (isa<IntrinsicInst>(CB) &&
+ OpIdx >= CB.getFunctionType()->getNumParams()) {
+ // This is known to be OK for stackmap.
+ return CB.getIntrinsicID() == Intrinsic::experimental_stackmap;
+ }
+
+ // gcroot is a special case, since it requires a constant argument which
+ // isn't also required to be a simple ConstantInt.
+ if (CB.getIntrinsicID() == Intrinsic::gcroot)
+ return false;
+
+ // Some intrinsic operands are required to be immediates.
+ return !CB.paramHasAttr(OpIdx, Attribute::ImmArg);
+ }
+
+ // It is never allowed to replace the call argument to an intrinsic, but it
+ // may be possible for a call.
+ return !isa<IntrinsicInst>(CB);
+ }
case Instruction::ShuffleVector:
// Shufflevector masks are constant.
return OpIdx != 2;
@@ -3006,3 +3057,37 @@ AllocaInst *llvm::findAllocaForValue(Value *V,
AllocaForValue[V] = Res;
return Res;
}
+
+Value *llvm::invertCondition(Value *Condition) {
+ // First: Check if it's a constant
+ if (Constant *C = dyn_cast<Constant>(Condition))
+ return ConstantExpr::getNot(C);
+
+ // Second: If the condition is already inverted, return the original value
+ Value *NotCondition;
+ if (match(Condition, m_Not(m_Value(NotCondition))))
+ return NotCondition;
+
+ BasicBlock *Parent = nullptr;
+ Instruction *Inst = dyn_cast<Instruction>(Condition);
+ if (Inst)
+ Parent = Inst->getParent();
+ else if (Argument *Arg = dyn_cast<Argument>(Condition))
+ Parent = &Arg->getParent()->getEntryBlock();
+ assert(Parent && "Unsupported condition to invert");
+
+ // Third: Check all the users for an invert
+ for (User *U : Condition->users())
+ if (Instruction *I = dyn_cast<Instruction>(U))
+ if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition))))
+ return I;
+
+ // Last option: Create a new instruction
+ auto *Inverted =
+ BinaryOperator::CreateNot(Condition, Condition->getName() + ".inv");
+ if (Inst && !isa<PHINode>(Inst))
+ Inverted->insertAfter(Inst);
+ else
+ Inverted->insertBefore(&*Parent->getFirstInsertionPt());
+ return Inverted;
+}
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index c065e0269c64a..8804bba975b6a 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -46,6 +46,11 @@ using namespace llvm;
STATISTIC(NumRotated, "Number of loops rotated");
+static cl::opt<bool>
+ MultiRotate("loop-rotate-multi", cl::init(false), cl::Hidden,
+ cl::desc("Allow loop rotation multiple times in order to reach "
+ "a better latch exit"));
+
namespace {
/// A simple loop rotation transformation.
class LoopRotate {
@@ -177,14 +182,16 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
}
}
-// Look for a phi which is only used outside the loop (via a LCSSA phi)
-// in the exit from the header. This means that rotating the loop can
-// remove the phi.
-static bool shouldRotateLoopExitingLatch(Loop *L) {
+// Assuming both header and latch are exiting, look for a phi which is only
+// used outside the loop (via a LCSSA phi) in the exit from the header.
+// This means that rotating the loop can remove the phi.
+static bool profitableToRotateLoopExitingLatch(Loop *L) {
BasicBlock *Header = L->getHeader();
- BasicBlock *HeaderExit = Header->getTerminator()->getSuccessor(0);
+ BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator());
+ assert(BI && BI->isConditional() && "need header with conditional exit");
+ BasicBlock *HeaderExit = BI->getSuccessor(0);
if (L->contains(HeaderExit))
- HeaderExit = Header->getTerminator()->getSuccessor(1);
+ HeaderExit = BI->getSuccessor(1);
for (auto &Phi : Header->phis()) {
// Look for uses of this phi in the loop/via exits other than the header.
@@ -194,7 +201,50 @@ static bool shouldRotateLoopExitingLatch(Loop *L) {
continue;
return true;
}
+ return false;
+}
+
+// Check that latch exit is deoptimizing (which means - very unlikely to happen)
+// and there is another exit from the loop which is non-deoptimizing.
+// If we rotate latch to that exit our loop has a better chance of being fully
+// canonical.
+//
+// It can give false positives in some rare cases.
+static bool canRotateDeoptimizingLatchExit(Loop *L) {
+ BasicBlock *Latch = L->getLoopLatch();
+ assert(Latch && "need latch");
+ BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator());
+ // Need normal exiting latch.
+ if (!BI || !BI->isConditional())
+ return false;
+
+ BasicBlock *Exit = BI->getSuccessor(1);
+ if (L->contains(Exit))
+ Exit = BI->getSuccessor(0);
+ // Latch exit is non-deoptimizing, no need to rotate.
+ if (!Exit->getPostdominatingDeoptimizeCall())
+ return false;
+
+ SmallVector<BasicBlock *, 4> Exits;
+ L->getUniqueExitBlocks(Exits);
+ if (!Exits.empty()) {
+ // There is at least one non-deoptimizing exit.
+ //
+ // Note, that BasicBlock::getPostdominatingDeoptimizeCall is not exact,
+ // as it can conservatively return false for deoptimizing exits with
+ // complex enough control flow down to deoptimize call.
+ //
+ // That means here we can report success for a case where
+ // all exits are deoptimizing but one of them has complex enough
+ // control flow (e.g. with loops).
+ //
+ // That should be a very rare case and false positives for this function
+ // have compile-time effect only.
+ return any_of(Exits, [](const BasicBlock *BB) {
+ return !BB->getPostdominatingDeoptimizeCall();
+ });
+ }
return false;
}
@@ -208,319 +258,342 @@ static bool shouldRotateLoopExitingLatch(Loop *L) {
/// rotation. LoopRotate should be repeatable and converge to a canonical
/// form. This property is satisfied because simplifying the loop latch can only
/// happen once across multiple invocations of the LoopRotate pass.
+///
+/// If -loop-rotate-multi is enabled we can do multiple rotations in one go
+/// so to reach a suitable (non-deoptimizing) exit.
bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
- BasicBlock *OrigHeader = L->getHeader();
- BasicBlock *OrigLatch = L->getLoopLatch();
-
- BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
- if (!BI || BI->isUnconditional())
- return false;
-
- // If the loop header is not one of the loop exiting blocks then
- // either this loop is already rotated or it is not
- // suitable for loop rotation transformations.
- if (!L->isLoopExiting(OrigHeader))
- return false;
-
- // If the loop latch already contains a branch that leaves the loop then the
- // loop is already rotated.
- if (!OrigLatch)
- return false;
-
- // Rotate if either the loop latch does *not* exit the loop, or if the loop
- // latch was just simplified. Or if we think it will be profitable.
- if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false &&
- !shouldRotateLoopExitingLatch(L))
- return false;
-
- // Check size of original header and reject loop if it is very big or we can't
- // duplicate blocks inside it.
- {
- SmallPtrSet<const Value *, 32> EphValues;
- CodeMetrics::collectEphemeralValues(L, AC, EphValues);
-
- CodeMetrics Metrics;
- Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
- if (Metrics.notDuplicatable) {
- LLVM_DEBUG(
- dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
- << " instructions: ";
- L->dump());
- return false;
- }
- if (Metrics.convergent) {
- LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent "
- "instructions: ";
- L->dump());
- return false;
+ bool Rotated = false;
+ do {
+ BasicBlock *OrigHeader = L->getHeader();
+ BasicBlock *OrigLatch = L->getLoopLatch();
+
+ BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
+ if (!BI || BI->isUnconditional())
+ return Rotated;
+
+ // If the loop header is not one of the loop exiting blocks then
+ // either this loop is already rotated or it is not
+ // suitable for loop rotation transformations.
+ if (!L->isLoopExiting(OrigHeader))
+ return Rotated;
+
+ // If the loop latch already contains a branch that leaves the loop then the
+ // loop is already rotated.
+ if (!OrigLatch)
+ return Rotated;
+
+ // Rotate if either the loop latch does *not* exit the loop, or if the loop
+ // latch was just simplified. Or if we think it will be profitable.
+ if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false &&
+ !profitableToRotateLoopExitingLatch(L) &&
+ !canRotateDeoptimizingLatchExit(L))
+ return Rotated;
+
+ // Check size of original header and reject loop if it is very big or we can't
+ // duplicate blocks inside it.
+ {
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(L, AC, EphValues);
+
+ CodeMetrics Metrics;
+ Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
+ if (Metrics.notDuplicatable) {
+ LLVM_DEBUG(
+ dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
+ << " instructions: ";
+ L->dump());
+ return Rotated;
+ }
+ if (Metrics.convergent) {
+ LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent "
+ "instructions: ";
+ L->dump());
+ return Rotated;
+ }
+ if (Metrics.NumInsts > MaxHeaderSize) {
+ LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains "
+ << Metrics.NumInsts
+ << " instructions, which is more than the threshold ("
+ << MaxHeaderSize << " instructions): ";
+ L->dump());
+ return Rotated;
+ }
}
- if (Metrics.NumInsts > MaxHeaderSize)
- return false;
- }
- // Now, this loop is suitable for rotation.
- BasicBlock *OrigPreheader = L->getLoopPreheader();
+ // Now, this loop is suitable for rotation.
+ BasicBlock *OrigPreheader = L->getLoopPreheader();
+
+ // If the loop could not be converted to canonical form, it must have an
+ // indirectbr in it, just give up.
+ if (!OrigPreheader || !L->hasDedicatedExits())
+ return Rotated;
+
+ // Anything ScalarEvolution may know about this loop or the PHI nodes
+ // in its header will soon be invalidated. We should also invalidate
+ // all outer loops because insertion and deletion of blocks that happens
+ // during the rotation may violate invariants related to backedge taken
+ // infos in them.
+ if (SE)
+ SE->forgetTopmostLoop(L);
+
+ LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
+ // Find new Loop header. NewHeader is a Header's one and only successor
+ // that is inside loop. Header's other successor is outside the
+ // loop. Otherwise loop is not suitable for rotation.
+ BasicBlock *Exit = BI->getSuccessor(0);
+ BasicBlock *NewHeader = BI->getSuccessor(1);
+ if (L->contains(Exit))
+ std::swap(Exit, NewHeader);
+ assert(NewHeader && "Unable to determine new loop header");
+ assert(L->contains(NewHeader) && !L->contains(Exit) &&
+ "Unable to determine loop header and exit blocks");
+
+ // This code assumes that the new header has exactly one predecessor.
+ // Remove any single-entry PHI nodes in it.
+ assert(NewHeader->getSinglePredecessor() &&
+ "New header doesn't have one pred!");
+ FoldSingleEntryPHINodes(NewHeader);
+
+ // Begin by walking OrigHeader and populating ValueMap with an entry for
+ // each Instruction.
+ BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
+ ValueToValueMapTy ValueMap, ValueMapMSSA;
+
+ // For PHI nodes, the value available in OldPreHeader is just the
+ // incoming value from OldPreHeader.
+ for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ InsertNewValueIntoMap(ValueMap, PN,
+ PN->getIncomingValueForBlock(OrigPreheader));
+
+ // For the rest of the instructions, either hoist to the OrigPreheader if
+ // possible or create a clone in the OldPreHeader if not.
+ Instruction *LoopEntryBranch = OrigPreheader->getTerminator();
+
+ // Record all debug intrinsics preceding LoopEntryBranch to avoid duplication.
+ using DbgIntrinsicHash =
+ std::pair<std::pair<Value *, DILocalVariable *>, DIExpression *>;
+ auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash {
+ return {{D->getVariableLocation(), D->getVariable()}, D->getExpression()};
+ };
+ SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics;
+ for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend();
+ I != E; ++I) {
+ if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&*I))
+ DbgIntrinsics.insert(makeHash(DII));
+ else
+ break;
+ }
- // If the loop could not be converted to canonical form, it must have an
- // indirectbr in it, just give up.
- if (!OrigPreheader || !L->hasDedicatedExits())
- return false;
+ while (I != E) {
+ Instruction *Inst = &*I++;
+
+ // If the instruction's operands are invariant and it doesn't read or write
+ // memory, then it is safe to hoist. Doing this doesn't change the order of
+ // execution in the preheader, but does prevent the instruction from
+ // executing in each iteration of the loop. This means it is safe to hoist
+ // something that might trap, but isn't safe to hoist something that reads
+ // memory (without proving that the loop doesn't write).
+ if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() &&
+ !Inst->mayWriteToMemory() && !Inst->isTerminator() &&
+ !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
+ Inst->moveBefore(LoopEntryBranch);
+ continue;
+ }
- // Anything ScalarEvolution may know about this loop or the PHI nodes
- // in its header will soon be invalidated. We should also invalidate
- // all outer loops because insertion and deletion of blocks that happens
- // during the rotation may violate invariants related to backedge taken
- // infos in them.
- if (SE)
- SE->forgetTopmostLoop(L);
+ // Otherwise, create a duplicate of the instruction.
+ Instruction *C = Inst->clone();
- LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
- if (MSSAU && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
+ // Eagerly remap the operands of the instruction.
+ RemapInstruction(C, ValueMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- // Find new Loop header. NewHeader is a Header's one and only successor
- // that is inside loop. Header's other successor is outside the
- // loop. Otherwise loop is not suitable for rotation.
- BasicBlock *Exit = BI->getSuccessor(0);
- BasicBlock *NewHeader = BI->getSuccessor(1);
- if (L->contains(Exit))
- std::swap(Exit, NewHeader);
- assert(NewHeader && "Unable to determine new loop header");
- assert(L->contains(NewHeader) && !L->contains(Exit) &&
- "Unable to determine loop header and exit blocks");
-
- // This code assumes that the new header has exactly one predecessor.
- // Remove any single-entry PHI nodes in it.
- assert(NewHeader->getSinglePredecessor() &&
- "New header doesn't have one pred!");
- FoldSingleEntryPHINodes(NewHeader);
-
- // Begin by walking OrigHeader and populating ValueMap with an entry for
- // each Instruction.
- BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
- ValueToValueMapTy ValueMap, ValueMapMSSA;
-
- // For PHI nodes, the value available in OldPreHeader is just the
- // incoming value from OldPreHeader.
- for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
- InsertNewValueIntoMap(ValueMap, PN,
- PN->getIncomingValueForBlock(OrigPreheader));
-
- // For the rest of the instructions, either hoist to the OrigPreheader if
- // possible or create a clone in the OldPreHeader if not.
- Instruction *LoopEntryBranch = OrigPreheader->getTerminator();
-
- // Record all debug intrinsics preceding LoopEntryBranch to avoid duplication.
- using DbgIntrinsicHash =
- std::pair<std::pair<Value *, DILocalVariable *>, DIExpression *>;
- auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash {
- return {{D->getVariableLocation(), D->getVariable()}, D->getExpression()};
- };
- SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics;
- for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend();
- I != E; ++I) {
- if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&*I))
- DbgIntrinsics.insert(makeHash(DII));
- else
- break;
- }
+ // Avoid inserting the same intrinsic twice.
+ if (auto *DII = dyn_cast<DbgVariableIntrinsic>(C))
+ if (DbgIntrinsics.count(makeHash(DII))) {
+ C->deleteValue();
+ continue;
+ }
- while (I != E) {
- Instruction *Inst = &*I++;
-
- // If the instruction's operands are invariant and it doesn't read or write
- // memory, then it is safe to hoist. Doing this doesn't change the order of
- // execution in the preheader, but does prevent the instruction from
- // executing in each iteration of the loop. This means it is safe to hoist
- // something that might trap, but isn't safe to hoist something that reads
- // memory (without proving that the loop doesn't write).
- if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() &&
- !Inst->mayWriteToMemory() && !Inst->isTerminator() &&
- !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
- Inst->moveBefore(LoopEntryBranch);
- continue;
+ // With the operands remapped, see if the instruction constant folds or is
+ // otherwise simplifyable. This commonly occurs because the entry from PHI
+ // nodes allows icmps and other instructions to fold.
+ Value *V = SimplifyInstruction(C, SQ);
+ if (V && LI->replacementPreservesLCSSAForm(C, V)) {
+ // If so, then delete the temporary instruction and stick the folded value
+ // in the map.
+ InsertNewValueIntoMap(ValueMap, Inst, V);
+ if (!C->mayHaveSideEffects()) {
+ C->deleteValue();
+ C = nullptr;
+ }
+ } else {
+ InsertNewValueIntoMap(ValueMap, Inst, C);
+ }
+ if (C) {
+ // Otherwise, stick the new instruction into the new block!
+ C->setName(Inst->getName());
+ C->insertBefore(LoopEntryBranch);
+
+ if (auto *II = dyn_cast<IntrinsicInst>(C))
+ if (II->getIntrinsicID() == Intrinsic::assume)
+ AC->registerAssumption(II);
+ // MemorySSA cares whether the cloned instruction was inserted or not, and
+ // not whether it can be remapped to a simplified value.
+ if (MSSAU)
+ InsertNewValueIntoMap(ValueMapMSSA, Inst, C);
+ }
}
- // Otherwise, create a duplicate of the instruction.
- Instruction *C = Inst->clone();
-
- // Eagerly remap the operands of the instruction.
- RemapInstruction(C, ValueMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ // Along with all the other instructions, we just cloned OrigHeader's
+ // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
+ // successors by duplicating their incoming values for OrigHeader.
+ for (BasicBlock *SuccBB : successors(OrigHeader))
+ for (BasicBlock::iterator BI = SuccBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
+
+ // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
+ // OrigPreHeader's old terminator (the original branch into the loop), and
+ // remove the corresponding incoming values from the PHI nodes in OrigHeader.
+ LoopEntryBranch->eraseFromParent();
+
+ // Update MemorySSA before the rewrite call below changes the 1:1
+ // instruction:cloned_instruction_or_value mapping.
+ if (MSSAU) {
+ InsertNewValueIntoMap(ValueMapMSSA, OrigHeader, OrigPreheader);
+ MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader,
+ ValueMapMSSA);
+ }
- // Avoid inserting the same intrinsic twice.
- if (auto *DII = dyn_cast<DbgVariableIntrinsic>(C))
- if (DbgIntrinsics.count(makeHash(DII))) {
- C->deleteValue();
- continue;
+ SmallVector<PHINode*, 2> InsertedPHIs;
+ // If there were any uses of instructions in the duplicated block outside the
+ // loop, update them, inserting PHI nodes as required
+ RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap,
+ &InsertedPHIs);
+
+ // Attach dbg.value intrinsics to the new phis if that phi uses a value that
+ // previously had debug metadata attached. This keeps the debug info
+ // up-to-date in the loop body.
+ if (!InsertedPHIs.empty())
+ insertDebugValuesForPHIs(OrigHeader, InsertedPHIs);
+
+ // NewHeader is now the header of the loop.
+ L->moveToHeader(NewHeader);
+ assert(L->getHeader() == NewHeader && "Latch block is our new header");
+
+ // Inform DT about changes to the CFG.
+ if (DT) {
+ // The OrigPreheader branches to the NewHeader and Exit now. Then, inform
+ // the DT about the removed edge to the OrigHeader (that got removed).
+ SmallVector<DominatorTree::UpdateType, 3> Updates;
+ Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit});
+ Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader});
+ Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader});
+ DT->applyUpdates(Updates);
+
+ if (MSSAU) {
+ MSSAU->applyUpdates(Updates, *DT);
+ if (VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
}
+ }
- // With the operands remapped, see if the instruction constant folds or is
- // otherwise simplifyable. This commonly occurs because the entry from PHI
- // nodes allows icmps and other instructions to fold.
- Value *V = SimplifyInstruction(C, SQ);
- if (V && LI->replacementPreservesLCSSAForm(C, V)) {
- // If so, then delete the temporary instruction and stick the folded value
- // in the map.
- InsertNewValueIntoMap(ValueMap, Inst, V);
- if (!C->mayHaveSideEffects()) {
- C->deleteValue();
- C = nullptr;
+ // At this point, we've finished our major CFG changes. As part of cloning
+ // the loop into the preheader we've simplified instructions and the
+ // duplicated conditional branch may now be branching on a constant. If it is
+ // branching on a constant and if that constant means that we enter the loop,
+ // then we fold away the cond branch to an uncond branch. This simplifies the
+ // loop in cases important for nested loops, and it also means we don't have
+ // to split as many edges.
+ BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
+ assert(PHBI->isConditional() && "Should be clone of BI condbr!");
+ if (!isa<ConstantInt>(PHBI->getCondition()) ||
+ PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) !=
+ NewHeader) {
+ // The conditional branch can't be folded, handle the general case.
+ // Split edges as necessary to preserve LoopSimplify form.
+
+ // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
+ // thus is not a preheader anymore.
+ // Split the edge to form a real preheader.
+ BasicBlock *NewPH = SplitCriticalEdge(
+ OrigPreheader, NewHeader,
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
+ NewPH->setName(NewHeader->getName() + ".lr.ph");
+
+ // Preserve canonical loop form, which means that 'Exit' should have only
+ // one predecessor. Note that Exit could be an exit block for multiple
+ // nested loops, causing both of the edges to now be critical and need to
+ // be split.
+ SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit));
+ bool SplitLatchEdge = false;
+ for (BasicBlock *ExitPred : ExitPreds) {
+ // We only need to split loop exit edges.
+ Loop *PredLoop = LI->getLoopFor(ExitPred);
+ if (!PredLoop || PredLoop->contains(Exit) ||
+ ExitPred->getTerminator()->isIndirectTerminator())
+ continue;
+ SplitLatchEdge |= L->getLoopLatch() == ExitPred;
+ BasicBlock *ExitSplit = SplitCriticalEdge(
+ ExitPred, Exit,
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
+ ExitSplit->moveBefore(Exit);
}
+ assert(SplitLatchEdge &&
+ "Despite splitting all preds, failed to split latch exit?");
} else {
- InsertNewValueIntoMap(ValueMap, Inst, C);
- }
- if (C) {
- // Otherwise, stick the new instruction into the new block!
- C->setName(Inst->getName());
- C->insertBefore(LoopEntryBranch);
-
- if (auto *II = dyn_cast<IntrinsicInst>(C))
- if (II->getIntrinsicID() == Intrinsic::assume)
- AC->registerAssumption(II);
- // MemorySSA cares whether the cloned instruction was inserted or not, and
- // not whether it can be remapped to a simplified value.
+ // We can fold the conditional branch in the preheader, this makes things
+ // simpler. The first step is to remove the extra edge to the Exit block.
+ Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
+ BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
+ NewBI->setDebugLoc(PHBI->getDebugLoc());
+ PHBI->eraseFromParent();
+
+ // With our CFG finalized, update DomTree if it is available.
+ if (DT) DT->deleteEdge(OrigPreheader, Exit);
+
+ // Update MSSA too, if available.
if (MSSAU)
- InsertNewValueIntoMap(ValueMapMSSA, Inst, C);
+ MSSAU->removeEdge(OrigPreheader, Exit);
}
- }
- // Along with all the other instructions, we just cloned OrigHeader's
- // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
- // successors by duplicating their incoming values for OrigHeader.
- for (BasicBlock *SuccBB : successors(OrigHeader))
- for (BasicBlock::iterator BI = SuccBB->begin();
- PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
- PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
-
- // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
- // OrigPreHeader's old terminator (the original branch into the loop), and
- // remove the corresponding incoming values from the PHI nodes in OrigHeader.
- LoopEntryBranch->eraseFromParent();
-
- // Update MemorySSA before the rewrite call below changes the 1:1
- // instruction:cloned_instruction_or_value mapping.
- if (MSSAU) {
- InsertNewValueIntoMap(ValueMapMSSA, OrigHeader, OrigPreheader);
- MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader,
- ValueMapMSSA);
- }
+ assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
+ assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
- SmallVector<PHINode*, 2> InsertedPHIs;
- // If there were any uses of instructions in the duplicated block outside the
- // loop, update them, inserting PHI nodes as required
- RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap,
- &InsertedPHIs);
-
- // Attach dbg.value intrinsics to the new phis if that phi uses a value that
- // previously had debug metadata attached. This keeps the debug info
- // up-to-date in the loop body.
- if (!InsertedPHIs.empty())
- insertDebugValuesForPHIs(OrigHeader, InsertedPHIs);
-
- // NewHeader is now the header of the loop.
- L->moveToHeader(NewHeader);
- assert(L->getHeader() == NewHeader && "Latch block is our new header");
-
- // Inform DT about changes to the CFG.
- if (DT) {
- // The OrigPreheader branches to the NewHeader and Exit now. Then, inform
- // the DT about the removed edge to the OrigHeader (that got removed).
- SmallVector<DominatorTree::UpdateType, 3> Updates;
- Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit});
- Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader});
- Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader});
- DT->applyUpdates(Updates);
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
- if (MSSAU) {
- MSSAU->applyUpdates(Updates, *DT);
- if (VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
- }
- }
+ // Now that the CFG and DomTree are in a consistent state again, try to merge
+ // the OrigHeader block into OrigLatch. This will succeed if they are
+ // connected by an unconditional branch. This is just a cleanup so the
+ // emitted code isn't too gross in this common case.
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU);
- // At this point, we've finished our major CFG changes. As part of cloning
- // the loop into the preheader we've simplified instructions and the
- // duplicated conditional branch may now be branching on a constant. If it is
- // branching on a constant and if that constant means that we enter the loop,
- // then we fold away the cond branch to an uncond branch. This simplifies the
- // loop in cases important for nested loops, and it also means we don't have
- // to split as many edges.
- BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
- assert(PHBI->isConditional() && "Should be clone of BI condbr!");
- if (!isa<ConstantInt>(PHBI->getCondition()) ||
- PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) !=
- NewHeader) {
- // The conditional branch can't be folded, handle the general case.
- // Split edges as necessary to preserve LoopSimplify form.
-
- // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
- // thus is not a preheader anymore.
- // Split the edge to form a real preheader.
- BasicBlock *NewPH = SplitCriticalEdge(
- OrigPreheader, NewHeader,
- CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
- NewPH->setName(NewHeader->getName() + ".lr.ph");
-
- // Preserve canonical loop form, which means that 'Exit' should have only
- // one predecessor. Note that Exit could be an exit block for multiple
- // nested loops, causing both of the edges to now be critical and need to
- // be split.
- SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit));
- bool SplitLatchEdge = false;
- for (BasicBlock *ExitPred : ExitPreds) {
- // We only need to split loop exit edges.
- Loop *PredLoop = LI->getLoopFor(ExitPred);
- if (!PredLoop || PredLoop->contains(Exit) ||
- ExitPred->getTerminator()->isIndirectTerminator())
- continue;
- SplitLatchEdge |= L->getLoopLatch() == ExitPred;
- BasicBlock *ExitSplit = SplitCriticalEdge(
- ExitPred, Exit,
- CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
- ExitSplit->moveBefore(Exit);
- }
- assert(SplitLatchEdge &&
- "Despite splitting all preds, failed to split latch exit?");
- } else {
- // We can fold the conditional branch in the preheader, this makes things
- // simpler. The first step is to remove the extra edge to the Exit block.
- Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
- BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
- NewBI->setDebugLoc(PHBI->getDebugLoc());
- PHBI->eraseFromParent();
-
- // With our CFG finalized, update DomTree if it is available.
- if (DT) DT->deleteEdge(OrigPreheader, Exit);
-
- // Update MSSA too, if available.
- if (MSSAU)
- MSSAU->removeEdge(OrigPreheader, Exit);
- }
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
- assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
- assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
+ LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump());
- if (MSSAU && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
+ ++NumRotated;
- // Now that the CFG and DomTree are in a consistent state again, try to merge
- // the OrigHeader block into OrigLatch. This will succeed if they are
- // connected by an unconditional branch. This is just a cleanup so the
- // emitted code isn't too gross in this common case.
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU);
+ Rotated = true;
+ SimplifiedLatch = false;
- if (MSSAU && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
+ // Check that new latch is a deoptimizing exit and then repeat rotation if possible.
+ // Deoptimizing latch exit is not a generally typical case, so we just loop over.
+ // TODO: if it becomes a performance bottleneck extend rotation algorithm
+ // to handle multiple rotations in one go.
+ } while (MultiRotate && canRotateDeoptimizingLatchExit(L));
- LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump());
- ++NumRotated;
return true;
}
diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 28f88f39a712d..a8445e94e55a0 100644
--- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -230,6 +230,27 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
if (!Preheader)
return nullptr;
+ // Treat the presence of convergent functions conservatively. The
+ // transformation is invalid if calls to certain convergent
+ // functions (like an AMDGPU barrier) get included in the resulting
+ // inner loop. But blocks meant for the inner loop will be
+ // identified later at a point where it's too late to abort the
+ // transformation. Also, the convergent attribute is not really
+ // sufficient to express the semantics of functions that are
+ // affected by this transformation. So we choose to back off if such
+ // a function call is present until a better alternative becomes
+ // available. This is similar to the conservative treatment of
+ // convergent function calls in GVNHoist and JumpThreading.
+ for (auto BB : L->blocks()) {
+ for (auto &II : *BB) {
+ if (auto CI = dyn_cast<CallBase>(&II)) {
+ if (CI->isConvergent()) {
+ return nullptr;
+ }
+ }
+ }
+ }
+
// The header is not a landing pad; preheader insertion should ensure this.
BasicBlock *Header = L->getHeader();
assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
@@ -598,6 +619,7 @@ ReprocessLoop:
if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) {
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
+ Changed = true;
}
}
@@ -674,10 +696,8 @@ ReprocessLoop:
LI->removeBlock(ExitingBlock);
DomTreeNode *Node = DT->getNode(ExitingBlock);
- const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
- Node->getChildren();
- while (!Children.empty()) {
- DomTreeNode *Child = Children.front();
+ while (!Node->isLeaf()) {
+ DomTreeNode *Child = Node->back();
DT->changeImmediateDominator(Child, Node->getIDom());
}
DT->eraseNode(ExitingBlock);
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 4b94b371e70a9..3875c631f839b 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -15,21 +15,46 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist_iterator.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueMap.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/GenericDomTree.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -38,6 +63,17 @@
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <algorithm>
+#include <assert.h>
+#include <type_traits>
+#include <vector>
+
+namespace llvm {
+class DataLayout;
+class Value;
+} // namespace llvm
+
using namespace llvm;
#define DEBUG_TYPE "loop-unroll"
@@ -45,8 +81,8 @@ using namespace llvm;
// TODO: Should these be here or in LoopUnroll?
STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
-STATISTIC(NumUnrolledWithHeader, "Number of loops unrolled without a "
- "conditional latch (completely or otherwise)");
+STATISTIC(NumUnrolledNotLatch, "Number of loops unrolled without a conditional "
+ "latch (completely or otherwise)");
static cl::opt<bool>
UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
@@ -63,39 +99,6 @@ UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
#endif
);
-/// Convert the instruction operands from referencing the current values into
-/// those specified by VMap.
-void llvm::remapInstruction(Instruction *I, ValueToValueMapTy &VMap) {
- for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
- Value *Op = I->getOperand(op);
-
- // Unwrap arguments of dbg.value intrinsics.
- bool Wrapped = false;
- if (auto *V = dyn_cast<MetadataAsValue>(Op))
- if (auto *Unwrapped = dyn_cast<ValueAsMetadata>(V->getMetadata())) {
- Op = Unwrapped->getValue();
- Wrapped = true;
- }
-
- auto wrap = [&](Value *V) {
- auto &C = I->getContext();
- return Wrapped ? MetadataAsValue::get(C, ValueAsMetadata::get(V)) : V;
- };
-
- ValueToValueMapTy::iterator It = VMap.find(Op);
- if (It != VMap.end())
- I->setOperand(op, wrap(It->second));
- }
-
- if (PHINode *PN = dyn_cast<PHINode>(I)) {
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i));
- if (It != VMap.end())
- PN->setIncomingBlock(i, cast<BasicBlock>(It->second));
- }
- }
-}
-
/// Check if unrolling created a situation where we need to insert phi nodes to
/// preserve LCSSA form.
/// \param Blocks is a vector of basic blocks representing unrolled loop.
@@ -199,18 +202,20 @@ static bool isEpilogProfitable(Loop *L) {
/// simplify/dce pass of the instructions.
void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
ScalarEvolution *SE, DominatorTree *DT,
- AssumptionCache *AC) {
+ AssumptionCache *AC,
+ const TargetTransformInfo *TTI) {
// Simplify any new induction variables in the partially unrolled loop.
if (SE && SimplifyIVs) {
SmallVector<WeakTrackingVH, 16> DeadInsts;
- simplifyLoopIVs(L, SE, DT, LI, DeadInsts);
+ simplifyLoopIVs(L, SE, DT, LI, TTI, DeadInsts);
// Aggressively clean up dead instructions that simplifyLoopIVs already
// identified. Any remaining should be cleaned up below.
- while (!DeadInsts.empty())
- if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+ while (!DeadInsts.empty()) {
+ Value *V = DeadInsts.pop_back_val();
+ if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ }
}
// At this point, the code is well formed. We now do a quick sweep over the
@@ -277,6 +282,7 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
ScalarEvolution *SE, DominatorTree *DT,
AssumptionCache *AC,
+ const TargetTransformInfo *TTI,
OptimizationRemarkEmitter *ORE,
bool PreserveLCSSA, Loop **RemainderLoop) {
@@ -298,48 +304,35 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
return LoopUnrollResult::Unmodified;
}
- // The current loop unroll pass can unroll loops with a single latch or header
- // that's a conditional branch exiting the loop.
+ // The current loop unroll pass can unroll loops that have
+ // (1) single latch; and
+ // (2a) latch is unconditional; or
+ // (2b) latch is conditional and is an exiting block
// FIXME: The implementation can be extended to work with more complicated
// cases, e.g. loops with multiple latches.
BasicBlock *Header = L->getHeader();
- BranchInst *HeaderBI = dyn_cast<BranchInst>(Header->getTerminator());
- BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
-
- // FIXME: Support loops without conditional latch and multiple exiting blocks.
- if (!BI ||
- (BI->isUnconditional() && (!HeaderBI || HeaderBI->isUnconditional() ||
- L->getExitingBlock() != Header))) {
- LLVM_DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional "
- "branch in the latch or header.\n");
- return LoopUnrollResult::Unmodified;
- }
-
- auto CheckLatchSuccessors = [&](unsigned S1, unsigned S2) {
- return BI->isConditional() && BI->getSuccessor(S1) == Header &&
- !L->contains(BI->getSuccessor(S2));
- };
-
- // If we have a conditional latch, it must exit the loop.
- if (BI && BI->isConditional() && !CheckLatchSuccessors(0, 1) &&
- !CheckLatchSuccessors(1, 0)) {
+ BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
+
+ // A conditional branch which exits the loop, which can be optimized to an
+ // unconditional branch in the unrolled loop in some cases.
+ BranchInst *ExitingBI = nullptr;
+ bool LatchIsExiting = L->isLoopExiting(LatchBlock);
+ if (LatchIsExiting)
+ ExitingBI = LatchBI;
+ else if (BasicBlock *ExitingBlock = L->getExitingBlock())
+ ExitingBI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
LLVM_DEBUG(
dbgs() << "Can't unroll; a conditional latch must exit the loop");
return LoopUnrollResult::Unmodified;
}
-
- auto CheckHeaderSuccessors = [&](unsigned S1, unsigned S2) {
- return HeaderBI && HeaderBI->isConditional() &&
- L->contains(HeaderBI->getSuccessor(S1)) &&
- !L->contains(HeaderBI->getSuccessor(S2));
- };
-
- // If we do not have a conditional latch, the header must exit the loop.
- if (BI && !BI->isConditional() && HeaderBI && HeaderBI->isConditional() &&
- !CheckHeaderSuccessors(0, 1) && !CheckHeaderSuccessors(1, 0)) {
- LLVM_DEBUG(dbgs() << "Can't unroll; conditional header must exit the loop");
- return LoopUnrollResult::Unmodified;
- }
+ LLVM_DEBUG({
+ if (ExitingBI)
+ dbgs() << " Exiting Block = " << ExitingBI->getParent()->getName()
+ << "\n";
+ else
+ dbgs() << " No single exiting block\n";
+ });
if (Header->hasAddressTaken()) {
// The loop-rotate pass can be helpful to avoid this in many cases.
@@ -421,8 +414,8 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
bool HasConvergent = false;
for (auto &BB : L->blocks())
for (auto &I : *BB)
- if (auto CS = CallSite(&I))
- HasConvergent |= CS.isConvergent();
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ HasConvergent |= CB->isConvergent();
assert((!HasConvergent || ULO.TripMultiple % ULO.Count == 0) &&
"Unroll count must divide trip multiple if loop contains a "
"convergent operation.");
@@ -435,7 +428,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (RuntimeTripCount && ULO.TripMultiple % ULO.Count != 0 &&
!UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount,
EpilogProfitability, ULO.UnrollRemainder,
- ULO.ForgetAllSCEV, LI, SE, DT, AC,
+ ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI,
PreserveLCSSA, RemainderLoop)) {
if (ULO.Force)
RuntimeTripCount = false;
@@ -528,16 +521,13 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
SE->forgetTopmostLoop(L);
}
- bool ContinueOnTrue;
- bool LatchIsExiting = BI->isConditional();
+ if (!LatchIsExiting)
+ ++NumUnrolledNotLatch;
+ Optional<bool> ContinueOnTrue = None;
BasicBlock *LoopExit = nullptr;
- if (LatchIsExiting) {
- ContinueOnTrue = L->contains(BI->getSuccessor(0));
- LoopExit = BI->getSuccessor(ContinueOnTrue);
- } else {
- NumUnrolledWithHeader++;
- ContinueOnTrue = L->contains(HeaderBI->getSuccessor(0));
- LoopExit = HeaderBI->getSuccessor(ContinueOnTrue);
+ if (ExitingBI) {
+ ContinueOnTrue = L->contains(ExitingBI->getSuccessor(0));
+ LoopExit = ExitingBI->getSuccessor(*ContinueOnTrue);
}
// For the first iteration of the loop, we should use the precloned values for
@@ -549,20 +539,14 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
std::vector<BasicBlock *> Headers;
- std::vector<BasicBlock *> HeaderSucc;
+ std::vector<BasicBlock *> ExitingBlocks;
+ std::vector<BasicBlock *> ExitingSucc;
std::vector<BasicBlock *> Latches;
Headers.push_back(Header);
Latches.push_back(LatchBlock);
-
- if (!LatchIsExiting) {
- auto *Term = cast<BranchInst>(Header->getTerminator());
- if (Term->isUnconditional() || L->contains(Term->getSuccessor(0))) {
- assert(L->contains(Term->getSuccessor(0)));
- HeaderSucc.push_back(Term->getSuccessor(0));
- } else {
- assert(L->contains(Term->getSuccessor(1)));
- HeaderSucc.push_back(Term->getSuccessor(1));
- }
+ if (ExitingBI) {
+ ExitingBlocks.push_back(ExitingBI->getParent());
+ ExitingSucc.push_back(ExitingBI->getSuccessor(!(*ContinueOnTrue)));
}
// The current on-the-fly SSA update requires blocks to be processed in
@@ -600,7 +584,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
for (unsigned It = 1; It != ULO.Count; ++It) {
- std::vector<BasicBlock*> NewBlocks;
+ SmallVector<BasicBlock *, 8> NewBlocks;
SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
NewLoops[L] = L;
@@ -654,12 +638,14 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (*BB == LatchBlock)
Latches.push_back(New);
- // Keep track of the successor of the new header in the current iteration.
- for (auto *Pred : predecessors(*BB))
- if (Pred == Header) {
- HeaderSucc.push_back(New);
- break;
- }
+ // Keep track of the exiting block and its successor block contained in
+ // the loop for the current iteration.
+ if (ExitingBI) {
+ if (*BB == ExitingBlocks[0])
+ ExitingBlocks.push_back(New);
+ if (*BB == ExitingSucc[0])
+ ExitingSucc.push_back(New);
+ }
NewBlocks.push_back(New);
UnrolledLoopBlocks.push_back(New);
@@ -682,9 +668,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
// Remap all instructions in the most recent iteration
+ remapInstructionsInBlocks(NewBlocks, LastValueMap);
for (BasicBlock *NewBlock : NewBlocks) {
for (Instruction &I : *NewBlock) {
- ::remapInstruction(&I, LastValueMap);
if (auto *II = dyn_cast<IntrinsicInst>(&I))
if (II->getIntrinsicID() == Intrinsic::assume)
AC->registerAssumption(II);
@@ -710,18 +696,19 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
}
- auto setDest = [LoopExit, ContinueOnTrue](BasicBlock *Src, BasicBlock *Dest,
- ArrayRef<BasicBlock *> NextBlocks,
- BasicBlock *BlockInLoop,
- bool NeedConditional) {
+ auto setDest = [](BasicBlock *Src, BasicBlock *Dest, BasicBlock *BlockInLoop,
+ bool NeedConditional, Optional<bool> ContinueOnTrue,
+ bool IsDestLoopExit) {
auto *Term = cast<BranchInst>(Src->getTerminator());
if (NeedConditional) {
// Update the conditional branch's successor for the following
// iteration.
- Term->setSuccessor(!ContinueOnTrue, Dest);
+ assert(ContinueOnTrue.hasValue() &&
+ "Expecting valid ContinueOnTrue when NeedConditional is true");
+ Term->setSuccessor(!(*ContinueOnTrue), Dest);
} else {
// Remove phi operands at this loop exit
- if (Dest != LoopExit) {
+ if (!IsDestLoopExit) {
BasicBlock *BB = Src;
for (BasicBlock *Succ : successors(BB)) {
// Preserve the incoming value from BB if we are jumping to the block
@@ -738,29 +725,27 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
};
- // Now that all the basic blocks for the unrolled iterations are in place,
- // set up the branches to connect them.
- if (LatchIsExiting) {
- // Set up latches to branch to the new header in the unrolled iterations or
- // the loop exit for the last latch in a fully unrolled loop.
- for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
- // The branch destination.
- unsigned j = (i + 1) % e;
- BasicBlock *Dest = Headers[j];
- bool NeedConditional = true;
+ // Connect latches of the unrolled iterations to the headers of the next
+ // iteration. If the latch is also the exiting block, the conditional branch
+ // may have to be preserved.
+ for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ BasicBlock *Dest = Headers[j];
+ bool NeedConditional = LatchIsExiting;
- if (RuntimeTripCount && j != 0) {
+ if (LatchIsExiting) {
+ if (RuntimeTripCount && j != 0)
NeedConditional = false;
- }
// For a complete unroll, make the last iteration end with a branch
// to the exit block.
if (CompletelyUnroll) {
if (j == 0)
Dest = LoopExit;
- // If using trip count upper bound to completely unroll, we need to keep
- // the conditional branch except the last one because the loop may exit
- // after any iteration.
+ // If using trip count upper bound to completely unroll, we need to
+ // keep the conditional branch except the last one because the loop
+ // may exit after any iteration.
assert(NeedConditional &&
"NeedCondition cannot be modified by both complete "
"unrolling and runtime unrolling");
@@ -772,16 +757,18 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// unconditional branch for some iterations.
NeedConditional = false;
}
-
- setDest(Latches[i], Dest, Headers, Headers[i], NeedConditional);
}
- } else {
- // Setup headers to branch to their new successors in the unrolled
- // iterations.
- for (unsigned i = 0, e = Headers.size(); i != e; ++i) {
+
+ setDest(Latches[i], Dest, Headers[i], NeedConditional, ContinueOnTrue,
+ Dest == LoopExit);
+ }
+
+ if (!LatchIsExiting) {
+ // If the latch is not exiting, we may be able to simplify the conditional
+ // branches in the unrolled exiting blocks.
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
// The branch destination.
unsigned j = (i + 1) % e;
- BasicBlock *Dest = HeaderSucc[i];
bool NeedConditional = true;
if (RuntimeTripCount && j != 0)
@@ -797,27 +784,19 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// unconditional branch for some iterations.
NeedConditional = false;
- setDest(Headers[i], Dest, Headers, HeaderSucc[i], NeedConditional);
+ // Conditional branches from non-latch exiting block have successors
+ // either in the same loop iteration or outside the loop. The branches are
+ // already correct.
+ if (NeedConditional)
+ continue;
+ setDest(ExitingBlocks[i], ExitingSucc[i], ExitingSucc[i], NeedConditional,
+ None, false);
}
- // Set up latches to branch to the new header in the unrolled iterations or
- // the loop exit for the last latch in a fully unrolled loop.
-
- for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
- // The original branch was replicated in each unrolled iteration.
- BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
-
- // The branch destination.
- unsigned j = (i + 1) % e;
- BasicBlock *Dest = Headers[j];
-
- // When completely unrolling, the last latch becomes unreachable.
- if (CompletelyUnroll && j == 0)
- new UnreachableInst(Term->getContext(), Term);
- else
- // Replace the conditional branch with an unconditional one.
- BranchInst::Create(Dest, Term);
-
+ // When completely unrolling, the last latch becomes unreachable.
+ if (CompletelyUnroll) {
+ BranchInst *Term = cast<BranchInst>(Latches.back()->getTerminator());
+ new UnreachableInst(Term->getContext(), Term);
Term->eraseFromParent();
}
}
@@ -830,15 +809,13 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
for (auto *BB : OriginalLoopBlocks) {
auto *BBDomNode = DT->getNode(BB);
SmallVector<BasicBlock *, 16> ChildrenToUpdate;
- for (auto *ChildDomNode : BBDomNode->getChildren()) {
+ for (auto *ChildDomNode : BBDomNode->children()) {
auto *ChildBB = ChildDomNode->getBlock();
if (!L->contains(ChildBB))
ChildrenToUpdate.push_back(ChildBB);
}
BasicBlock *NewIDom;
- BasicBlock *&TermBlock = LatchIsExiting ? LatchBlock : Header;
- auto &TermBlocks = LatchIsExiting ? Latches : Headers;
- if (BB == TermBlock) {
+ if (ExitingBI && BB == ExitingBlocks[0]) {
// The latch is special because we emit unconditional branches in
// some cases where the original loop contained a conditional branch.
// Since the latch is always at the bottom of the loop, if the latch
@@ -846,13 +823,14 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// must also be a latch. Specifically, the dominator is the first
// latch which ends in a conditional branch, or the last latch if
// there is no such latch.
- // For loops exiting from the header, we limit the supported loops
- // to have a single exiting block.
- NewIDom = TermBlocks.back();
- for (BasicBlock *Iter : TermBlocks) {
- Instruction *Term = Iter->getTerminator();
+ // For loops exiting from non latch exiting block, we limit the
+ // branch simplification to single exiting block loops.
+ NewIDom = ExitingBlocks.back();
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ Instruction *Term = ExitingBlocks[i]->getTerminator();
if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
- NewIDom = Iter;
+ NewIDom =
+ DT->findNearestCommonDominator(ExitingBlocks[i], Latches[i]);
break;
}
}
@@ -897,7 +875,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// At this point, the code is well formed. We now simplify the unrolled loop,
// doing constant propagation and dead code elimination as we go.
simplifyLoopAfterUnroll(L, !CompletelyUnroll && (ULO.Count > 1 || Peeled), LI,
- SE, DT, AC);
+ SE, DT, AC, TTI);
NumCompletelyUnrolled += CompletelyUnroll;
++NumUnrolled;
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index f1965934b2d71..dd628f3e7e0ca 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -11,31 +11,54 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/DependenceAnalysis.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueMap.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GenericDomTree.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <assert.h>
+#include <memory>
+#include <type_traits>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "loop-unroll-and-jam"
@@ -47,17 +70,14 @@ typedef SmallPtrSet<BasicBlock *, 4> BasicBlockSet;
// Partition blocks in an outer/inner loop pair into blocks before and after
// the loop
-static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop,
- BasicBlockSet &ForeBlocks,
- BasicBlockSet &SubLoopBlocks,
- BasicBlockSet &AftBlocks,
- DominatorTree *DT) {
+static bool partitionLoopBlocks(Loop &L, BasicBlockSet &ForeBlocks,
+ BasicBlockSet &AftBlocks, DominatorTree &DT) {
+ Loop *SubLoop = L.getSubLoops()[0];
BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
- SubLoopBlocks.insert(SubLoop->block_begin(), SubLoop->block_end());
- for (BasicBlock *BB : L->blocks()) {
+ for (BasicBlock *BB : L.blocks()) {
if (!SubLoop->contains(BB)) {
- if (DT->dominates(SubLoopLatch, BB))
+ if (DT.dominates(SubLoopLatch, BB))
AftBlocks.insert(BB);
else
ForeBlocks.insert(BB);
@@ -71,14 +91,44 @@ static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop,
if (BB == SubLoopPreHeader)
continue;
Instruction *TI = BB->getTerminator();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- if (!ForeBlocks.count(TI->getSuccessor(i)))
+ for (BasicBlock *Succ : successors(TI))
+ if (!ForeBlocks.count(Succ))
return false;
}
return true;
}
+/// Partition blocks in a loop nest into blocks before and after each inner
+/// loop.
+static bool partitionOuterLoopBlocks(
+ Loop &Root, Loop &JamLoop, BasicBlockSet &JamLoopBlocks,
+ DenseMap<Loop *, BasicBlockSet> &ForeBlocksMap,
+ DenseMap<Loop *, BasicBlockSet> &AftBlocksMap, DominatorTree &DT) {
+ JamLoopBlocks.insert(JamLoop.block_begin(), JamLoop.block_end());
+
+ for (Loop *L : Root.getLoopsInPreorder()) {
+ if (L == &JamLoop)
+ break;
+
+ if (!partitionLoopBlocks(*L, ForeBlocksMap[L], AftBlocksMap[L], DT))
+ return false;
+ }
+
+ return true;
+}
+
+// TODO Remove when UnrollAndJamLoop changed to support unroll and jamming more
+// than 2 levels loop.
+static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop,
+ BasicBlockSet &ForeBlocks,
+ BasicBlockSet &SubLoopBlocks,
+ BasicBlockSet &AftBlocks,
+ DominatorTree *DT) {
+ SubLoopBlocks.insert(SubLoop->block_begin(), SubLoop->block_end());
+ return partitionLoopBlocks(*L, ForeBlocks, AftBlocks, *DT);
+}
+
// Looks at the phi nodes in Header for values coming from Latch. For these
// instructions and all their operands calls Visit on them, keeping going for
// all the operands in AftBlocks. Returns false if Visit returns false,
@@ -169,10 +219,12 @@ static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header,
If EpilogueLoop is non-null, it receives the epilogue loop (if it was
necessary to create one and not fully unrolled).
*/
-LoopUnrollResult llvm::UnrollAndJamLoop(
- Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple,
- bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
- AssumptionCache *AC, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) {
+LoopUnrollResult
+llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
+ unsigned TripMultiple, bool UnrollRemainder,
+ LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC, const TargetTransformInfo *TTI,
+ OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) {
// When we enter here we should have already checked that it is safe
BasicBlock *Header = L->getHeader();
@@ -198,7 +250,7 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
/*UseEpilogRemainder*/ true,
UnrollRemainder, /*ForgetAllSCEV*/ false,
- LI, SE, DT, AC, true, EpilogueLoop)) {
+ LI, SE, DT, AC, TTI, true, EpilogueLoop)) {
LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
"generated when assuming runtime trip count\n");
return LoopUnrollResult::Unmodified;
@@ -284,8 +336,7 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
// Move any instructions from fore phi operands from AftBlocks into Fore.
moveHeaderPhiOperandsToForeBlocks(
- Header, LatchBlock, SubLoop->getLoopPreheader()->getTerminator(),
- AftBlocks);
+ Header, LatchBlock, ForeBlocksLast[0]->getTerminator(), AftBlocks);
// The current on-the-fly SSA update requires blocks to be processed in
// reverse postorder so that LastValueMap contains the correct value at each
@@ -312,32 +363,32 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
// Copy all blocks
for (unsigned It = 1; It != Count; ++It) {
- std::vector<BasicBlock *> NewBlocks;
+ SmallVector<BasicBlock *, 8> NewBlocks;
// Maps Blocks[It] -> Blocks[It-1]
DenseMap<Value *, Value *> PrevItValueMap;
+ SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
+ NewLoops[L] = L;
+ NewLoops[SubLoop] = SubLoop;
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
ValueToValueMapTy VMap;
BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
Header->getParent()->getBasicBlockList().push_back(New);
- if (ForeBlocks.count(*BB)) {
- L->addBasicBlockToLoop(New, *LI);
+ // Tell LI about New.
+ addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
+ if (ForeBlocks.count(*BB)) {
if (*BB == ForeBlocksFirst[0])
ForeBlocksFirst.push_back(New);
if (*BB == ForeBlocksLast[0])
ForeBlocksLast.push_back(New);
} else if (SubLoopBlocks.count(*BB)) {
- SubLoop->addBasicBlockToLoop(New, *LI);
-
if (*BB == SubLoopBlocksFirst[0])
SubLoopBlocksFirst.push_back(New);
if (*BB == SubLoopBlocksLast[0])
SubLoopBlocksLast.push_back(New);
} else if (AftBlocks.count(*BB)) {
- L->addBasicBlockToLoop(New, *LI);
-
if (*BB == AftBlocksFirst[0])
AftBlocksFirst.push_back(New);
if (*BB == AftBlocksLast[0])
@@ -379,9 +430,9 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
}
// Remap all instructions in the most recent iteration
+ remapInstructionsInBlocks(NewBlocks, LastValueMap);
for (BasicBlock *NewBlock : NewBlocks) {
for (Instruction &I : *NewBlock) {
- ::remapInstruction(&I, LastValueMap);
if (auto *II = dyn_cast<IntrinsicInst>(&I))
if (II->getIntrinsicID() == Intrinsic::assume)
AC->registerAssumption(II);
@@ -447,8 +498,8 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
// Update ForeBlocks successors and phi nodes
BranchInst *ForeTerm =
cast<BranchInst>(ForeBlocksLast.back()->getTerminator());
- BasicBlock *Dest = SubLoopBlocksFirst[0];
- ForeTerm->setSuccessor(0, Dest);
+ assert(ForeTerm->getNumSuccessors() == 1 && "Expecting one successor");
+ ForeTerm->setSuccessor(0, SubLoopBlocksFirst[0]);
if (CompletelyUnroll) {
while (PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) {
@@ -465,8 +516,8 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
// Remap ForeBlock successors from previous iteration to this
BranchInst *ForeTerm =
cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator());
- BasicBlock *Dest = ForeBlocksFirst[It];
- ForeTerm->setSuccessor(0, Dest);
+ assert(ForeTerm->getNumSuccessors() == 1 && "Expecting one successor");
+ ForeTerm->setSuccessor(0, ForeBlocksFirst[It]);
}
// Subloop successors and phis
@@ -495,12 +546,14 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
}
// Aft blocks successors and phis
- BranchInst *Term = cast<BranchInst>(AftBlocksLast.back()->getTerminator());
+ BranchInst *AftTerm = cast<BranchInst>(AftBlocksLast.back()->getTerminator());
if (CompletelyUnroll) {
- BranchInst::Create(LoopExit, Term);
- Term->eraseFromParent();
+ BranchInst::Create(LoopExit, AftTerm);
+ AftTerm->eraseFromParent();
} else {
- Term->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]);
+ AftTerm->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]);
+ assert(AftTerm->getSuccessor(ContinueOnTrue) == LoopExit &&
+ "Expecting the ContinueOnTrue successor of AftTerm to be LoopExit");
}
updatePHIBlocks(AftBlocksFirst[0], SubLoopBlocksLast[0],
SubLoopBlocksLast.back());
@@ -540,55 +593,48 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
- while (!MergeBlocks.empty()) {
- BasicBlock *BB = *MergeBlocks.begin();
- BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
- if (Term && Term->isUnconditional() && L->contains(Term->getSuccessor(0))) {
- BasicBlock *Dest = Term->getSuccessor(0);
- BasicBlock *Fold = Dest->getUniquePredecessor();
- if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
- // Don't remove BB and add Fold as they are the same BB
- assert(Fold == BB);
- (void)Fold;
- MergeBlocks.erase(Dest);
- } else
- MergeBlocks.erase(BB);
- } else
- MergeBlocks.erase(BB);
- }
+
+ MergeBlockSuccessorsIntoGivenBlocks(MergeBlocks, L, &DTU, LI);
+
// Apply updates to the DomTree.
DT = &DTU.getDomTree();
// At this point, the code is well formed. We now do a quick sweep over the
// inserted code, doing constant propagation and dead code elimination as we
// go.
- simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC);
- simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC);
+ simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC, TTI);
+ simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC,
+ TTI);
NumCompletelyUnrolledAndJammed += CompletelyUnroll;
++NumUnrolledAndJammed;
+ // Update LoopInfo if the loop is completely removed.
+ if (CompletelyUnroll)
+ LI->erase(L);
+
#ifndef NDEBUG
// We shouldn't have done anything to break loop simplify form or LCSSA.
- Loop *OuterL = L->getParentLoop();
- Loop *OutestLoop = OuterL ? OuterL : (!CompletelyUnroll ? L : SubLoop);
+ Loop *OutestLoop = SubLoop->getParentLoop()
+ ? SubLoop->getParentLoop()->getParentLoop()
+ ? SubLoop->getParentLoop()->getParentLoop()
+ : SubLoop->getParentLoop()
+ : SubLoop;
+ assert(DT->verify());
+ LI->verify(*DT);
assert(OutestLoop->isRecursivelyLCSSAForm(*DT, *LI));
if (!CompletelyUnroll)
assert(L->isLoopSimplifyForm());
assert(SubLoop->isLoopSimplifyForm());
- assert(DT->verify());
+ SE->verify();
#endif
- // Update LoopInfo if the loop is completely removed.
- if (CompletelyUnroll)
- LI->erase(L);
-
return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
: LoopUnrollResult::PartiallyUnrolled;
}
static bool getLoadsAndStores(BasicBlockSet &Blocks,
- SmallVector<Value *, 4> &MemInstr) {
+ SmallVector<Instruction *, 4> &MemInstr) {
// Scan the BBs and collect legal loads and stores.
// Returns false if non-simple loads/stores are found.
for (BasicBlock *BB : Blocks) {
@@ -609,97 +655,235 @@ static bool getLoadsAndStores(BasicBlockSet &Blocks,
return true;
}
-static bool checkDependencies(SmallVector<Value *, 4> &Earlier,
- SmallVector<Value *, 4> &Later,
- unsigned LoopDepth, bool InnerLoop,
- DependenceInfo &DI) {
- // Use DA to check for dependencies between loads and stores that make unroll
- // and jam invalid
- for (Value *I : Earlier) {
- for (Value *J : Later) {
- Instruction *Src = cast<Instruction>(I);
- Instruction *Dst = cast<Instruction>(J);
- if (Src == Dst)
- continue;
- // Ignore Input dependencies.
- if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
- continue;
-
- // Track dependencies, and if we find them take a conservative approach
- // by allowing only = or < (not >), altough some > would be safe
- // (depending upon unroll width).
- // For the inner loop, we need to disallow any (> <) dependencies
- // FIXME: Allow > so long as distance is less than unroll width
- if (auto D = DI.depends(Src, Dst, true)) {
- assert(D->isOrdered() && "Expected an output, flow or anti dep.");
-
- if (D->isConfused()) {
- LLVM_DEBUG(dbgs() << " Confused dependency between:\n"
- << " " << *Src << "\n"
- << " " << *Dst << "\n");
+static bool preservesForwardDependence(Instruction *Src, Instruction *Dst,
+ unsigned UnrollLevel, unsigned JamLevel,
+ bool Sequentialized, Dependence *D) {
+ // UnrollLevel might carry the dependency Src --> Dst
+ // Does a different loop after unrolling?
+ for (unsigned CurLoopDepth = UnrollLevel + 1; CurLoopDepth <= JamLevel;
+ ++CurLoopDepth) {
+ auto JammedDir = D->getDirection(CurLoopDepth);
+ if (JammedDir == Dependence::DVEntry::LT)
+ return true;
+
+ if (JammedDir & Dependence::DVEntry::GT)
+ return false;
+ }
+
+ return true;
+}
+
+static bool preservesBackwardDependence(Instruction *Src, Instruction *Dst,
+ unsigned UnrollLevel, unsigned JamLevel,
+ bool Sequentialized, Dependence *D) {
+ // UnrollLevel might carry the dependency Dst --> Src
+ for (unsigned CurLoopDepth = UnrollLevel + 1; CurLoopDepth <= JamLevel;
+ ++CurLoopDepth) {
+ auto JammedDir = D->getDirection(CurLoopDepth);
+ if (JammedDir == Dependence::DVEntry::GT)
+ return true;
+
+ if (JammedDir & Dependence::DVEntry::LT)
+ return false;
+ }
+
+ // Backward dependencies are only preserved if not interleaved.
+ return Sequentialized;
+}
+
+// Check whether it is semantically safe Src and Dst considering any potential
+// dependency between them.
+//
+// @param UnrollLevel The level of the loop being unrolled
+// @param JamLevel The level of the loop being jammed; if Src and Dst are on
+// different levels, the outermost common loop counts as jammed level
+//
+// @return true if is safe and false if there is a dependency violation.
+static bool checkDependency(Instruction *Src, Instruction *Dst,
+ unsigned UnrollLevel, unsigned JamLevel,
+ bool Sequentialized, DependenceInfo &DI) {
+ assert(UnrollLevel <= JamLevel &&
+ "Expecting JamLevel to be at least UnrollLevel");
+
+ if (Src == Dst)
+ return true;
+ // Ignore Input dependencies.
+ if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
+ return true;
+
+ // Check whether unroll-and-jam may violate a dependency.
+ // By construction, every dependency will be lexicographically non-negative
+ // (if it was, it would violate the current execution order), such as
+ // (0,0,>,*,*)
+ // Unroll-and-jam changes the GT execution of two executions to the same
+ // iteration of the chosen unroll level. That is, a GT dependence becomes a GE
+ // dependence (or EQ, if we fully unrolled the loop) at the loop's position:
+ // (0,0,>=,*,*)
+ // Now, the dependency is not necessarily non-negative anymore, i.e.
+ // unroll-and-jam may violate correctness.
+ std::unique_ptr<Dependence> D = DI.depends(Src, Dst, true);
+ if (!D)
+ return true;
+ assert(D->isOrdered() && "Expected an output, flow or anti dep.");
+
+ if (D->isConfused()) {
+ LLVM_DEBUG(dbgs() << " Confused dependency between:\n"
+ << " " << *Src << "\n"
+ << " " << *Dst << "\n");
+ return false;
+ }
+
+ // If outer levels (levels enclosing the loop being unroll-and-jammed) have a
+ // non-equal direction, then the locations accessed in the inner levels cannot
+ // overlap in memory. We assumes the indexes never overlap into neighboring
+ // dimensions.
+ for (unsigned CurLoopDepth = 1; CurLoopDepth < UnrollLevel; ++CurLoopDepth)
+ if (!(D->getDirection(CurLoopDepth) & Dependence::DVEntry::EQ))
+ return true;
+
+ auto UnrollDirection = D->getDirection(UnrollLevel);
+
+ // If the distance carried by the unrolled loop is 0, then after unrolling
+ // that distance will become non-zero resulting in non-overlapping accesses in
+ // the inner loops.
+ if (UnrollDirection == Dependence::DVEntry::EQ)
+ return true;
+
+ if (UnrollDirection & Dependence::DVEntry::LT &&
+ !preservesForwardDependence(Src, Dst, UnrollLevel, JamLevel,
+ Sequentialized, D.get()))
+ return false;
+
+ if (UnrollDirection & Dependence::DVEntry::GT &&
+ !preservesBackwardDependence(Src, Dst, UnrollLevel, JamLevel,
+ Sequentialized, D.get()))
+ return false;
+
+ return true;
+}
+
+static bool
+checkDependencies(Loop &Root, const BasicBlockSet &SubLoopBlocks,
+ const DenseMap<Loop *, BasicBlockSet> &ForeBlocksMap,
+ const DenseMap<Loop *, BasicBlockSet> &AftBlocksMap,
+ DependenceInfo &DI, LoopInfo &LI) {
+ SmallVector<BasicBlockSet, 8> AllBlocks;
+ for (Loop *L : Root.getLoopsInPreorder())
+ if (ForeBlocksMap.find(L) != ForeBlocksMap.end())
+ AllBlocks.push_back(ForeBlocksMap.lookup(L));
+ AllBlocks.push_back(SubLoopBlocks);
+ for (Loop *L : Root.getLoopsInPreorder())
+ if (AftBlocksMap.find(L) != AftBlocksMap.end())
+ AllBlocks.push_back(AftBlocksMap.lookup(L));
+
+ unsigned LoopDepth = Root.getLoopDepth();
+ SmallVector<Instruction *, 4> EarlierLoadsAndStores;
+ SmallVector<Instruction *, 4> CurrentLoadsAndStores;
+ for (BasicBlockSet &Blocks : AllBlocks) {
+ CurrentLoadsAndStores.clear();
+ if (!getLoadsAndStores(Blocks, CurrentLoadsAndStores))
+ return false;
+
+ Loop *CurLoop = LI.getLoopFor((*Blocks.begin())->front().getParent());
+ unsigned CurLoopDepth = CurLoop->getLoopDepth();
+
+ for (auto *Earlier : EarlierLoadsAndStores) {
+ Loop *EarlierLoop = LI.getLoopFor(Earlier->getParent());
+ unsigned EarlierDepth = EarlierLoop->getLoopDepth();
+ unsigned CommonLoopDepth = std::min(EarlierDepth, CurLoopDepth);
+ for (auto *Later : CurrentLoadsAndStores) {
+ if (!checkDependency(Earlier, Later, LoopDepth, CommonLoopDepth, false,
+ DI))
return false;
- }
- if (!InnerLoop) {
- if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT) {
- LLVM_DEBUG(dbgs() << " > dependency between:\n"
- << " " << *Src << "\n"
- << " " << *Dst << "\n");
- return false;
- }
- } else {
- assert(LoopDepth + 1 <= D->getLevels());
- if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT &&
- D->getDirection(LoopDepth + 1) & Dependence::DVEntry::LT) {
- LLVM_DEBUG(dbgs() << " < > dependency between:\n"
- << " " << *Src << "\n"
- << " " << *Dst << "\n");
- return false;
- }
- }
}
}
+
+ size_t NumInsts = CurrentLoadsAndStores.size();
+ for (size_t I = 0; I < NumInsts; ++I) {
+ for (size_t J = I; J < NumInsts; ++J) {
+ if (!checkDependency(CurrentLoadsAndStores[I], CurrentLoadsAndStores[J],
+ LoopDepth, CurLoopDepth, true, DI))
+ return false;
+ }
+ }
+
+ EarlierLoadsAndStores.append(CurrentLoadsAndStores.begin(),
+ CurrentLoadsAndStores.end());
}
return true;
}
-static bool checkDependencies(Loop *L, BasicBlockSet &ForeBlocks,
- BasicBlockSet &SubLoopBlocks,
- BasicBlockSet &AftBlocks, DependenceInfo &DI) {
- // Get all loads/store pairs for each blocks
- SmallVector<Value *, 4> ForeMemInstr;
- SmallVector<Value *, 4> SubLoopMemInstr;
- SmallVector<Value *, 4> AftMemInstr;
- if (!getLoadsAndStores(ForeBlocks, ForeMemInstr) ||
- !getLoadsAndStores(SubLoopBlocks, SubLoopMemInstr) ||
- !getLoadsAndStores(AftBlocks, AftMemInstr))
+static bool isEligibleLoopForm(const Loop &Root) {
+ // Root must have a child.
+ if (Root.getSubLoops().size() != 1)
return false;
- // Check for dependencies between any blocks that may change order
- unsigned LoopDepth = L->getLoopDepth();
- return checkDependencies(ForeMemInstr, SubLoopMemInstr, LoopDepth, false,
- DI) &&
- checkDependencies(ForeMemInstr, AftMemInstr, LoopDepth, false, DI) &&
- checkDependencies(SubLoopMemInstr, AftMemInstr, LoopDepth, false,
- DI) &&
- checkDependencies(SubLoopMemInstr, SubLoopMemInstr, LoopDepth, true,
- DI);
+ const Loop *L = &Root;
+ do {
+ // All loops in Root need to be in simplify and rotated form.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
+ if (!L->isRotatedForm())
+ return false;
+
+ if (L->getHeader()->hasAddressTaken()) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Address taken\n");
+ return false;
+ }
+
+ unsigned SubLoopsSize = L->getSubLoops().size();
+ if (SubLoopsSize == 0)
+ return true;
+
+ // Only one child is allowed.
+ if (SubLoopsSize != 1)
+ return false;
+
+ L = L->getSubLoops()[0];
+ } while (L);
+
+ return true;
+}
+
+static Loop *getInnerMostLoop(Loop *L) {
+ while (!L->getSubLoops().empty())
+ L = L->getSubLoops()[0];
+ return L;
}
bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
- DependenceInfo &DI) {
+ DependenceInfo &DI, LoopInfo &LI) {
+ if (!isEligibleLoopForm(*L)) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Ineligible loop form\n");
+ return false;
+ }
+
/* We currently handle outer loops like this:
|
- ForeFirst <----\ }
- Blocks | } ForeBlocks
- ForeLast | }
- | |
- SubLoopFirst <\ | }
- Blocks | | } SubLoopBlocks
- SubLoopLast -/ | }
- | |
- AftFirst | }
- Blocks | } AftBlocks
- AftLast ------/ }
+ ForeFirst <------\ }
+ Blocks | } ForeBlocks of L
+ ForeLast | }
+ | |
+ ... |
+ | |
+ ForeFirst <----\ | }
+ Blocks | | } ForeBlocks of a inner loop of L
+ ForeLast | | }
+ | | |
+ JamLoopFirst <\ | | }
+ Blocks | | | } JamLoopBlocks of the innermost loop
+ JamLoopLast -/ | | }
+ | | |
+ AftFirst | | }
+ Blocks | | } AftBlocks of a inner loop of L
+ AftLast ------/ | }
+ | |
+ ... |
+ | |
+ AftFirst | }
+ Blocks | } AftBlocks of L
+ AftLast --------/ }
|
There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks
@@ -709,14 +893,16 @@ bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
things further in the profitablility checks of the unroll and jam pass.
Because of the way we rearrange basic blocks, we also require that
- the Fore blocks on all unrolled iterations are safe to move before the
- SubLoop blocks of all iterations. So we require that the phi node looping
- operands of ForeHeader can be moved to at least the end of ForeEnd, so that
- we can arrange cloned Fore Blocks before the subloop and match up Phi's
- correctly.
+ the Fore blocks of L on all unrolled iterations are safe to move before the
+ blocks of the direct child of L of all iterations. So we require that the
+ phi node looping operands of ForeHeader can be moved to at least the end of
+ ForeEnd, so that we can arrange cloned Fore Blocks before the subloop and
+ match up Phi's correctly.
- i.e. The old order of blocks used to be F1 S1_1 S1_2 A1 F2 S2_1 S2_2 A2.
- It needs to be safe to tranform this to F1 F2 S1_1 S2_1 S1_2 S2_2 A1 A2.
+ i.e. The old order of blocks used to be
+ (F1)1 (F2)1 J1_1 J1_2 (A2)1 (A1)1 (F1)2 (F2)2 J2_1 J2_2 (A2)2 (A1)2.
+ It needs to be safe to transform this to
+ (F1)1 (F1)2 (F2)1 (F2)2 J1_1 J1_2 J2_1 J2_2 (A2)1 (A2)2 (A1)1 (A1)2.
There are then a number of checks along the lines of no calls, no
exceptions, inner loop IV is consistent, etc. Note that for loops requiring
@@ -724,35 +910,13 @@ bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
UnrollAndJamLoop if the trip count cannot be easily calculated.
*/
- if (!L->isLoopSimplifyForm() || L->getSubLoops().size() != 1)
- return false;
- Loop *SubLoop = L->getSubLoops()[0];
- if (!SubLoop->isLoopSimplifyForm())
- return false;
-
- BasicBlock *Header = L->getHeader();
- BasicBlock *Latch = L->getLoopLatch();
- BasicBlock *Exit = L->getExitingBlock();
- BasicBlock *SubLoopHeader = SubLoop->getHeader();
- BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
- BasicBlock *SubLoopExit = SubLoop->getExitingBlock();
-
- if (Latch != Exit)
- return false;
- if (SubLoopLatch != SubLoopExit)
- return false;
-
- if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken()) {
- LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Address taken\n");
- return false;
- }
-
// Split blocks into Fore/SubLoop/Aft based on dominators
+ Loop *JamLoop = getInnerMostLoop(L);
BasicBlockSet SubLoopBlocks;
- BasicBlockSet ForeBlocks;
- BasicBlockSet AftBlocks;
- if (!partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks,
- AftBlocks, &DT)) {
+ DenseMap<Loop *, BasicBlockSet> ForeBlocksMap;
+ DenseMap<Loop *, BasicBlockSet> AftBlocksMap;
+ if (!partitionOuterLoopBlocks(*L, *JamLoop, SubLoopBlocks, ForeBlocksMap,
+ AftBlocksMap, DT)) {
LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Incompatible loop layout\n");
return false;
}
@@ -760,7 +924,7 @@ bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
// Aft blocks may need to move instructions to fore blocks, which becomes more
// difficult if there are multiple (potentially conditionally executed)
// blocks. For now we just exclude loops with multiple aft blocks.
- if (AftBlocks.size() != 1) {
+ if (AftBlocksMap[L].size() != 1) {
LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Can't currently handle "
"multiple blocks after the loop\n");
return false;
@@ -768,7 +932,9 @@ bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
// Check inner loop backedge count is consistent on all iterations of the
// outer loop
- if (!hasIterationCountInvariantInParent(SubLoop, SE)) {
+ if (any_of(L->getLoopsInPreorder(), [&SE](Loop *SubLoop) {
+ return !hasIterationCountInvariantInParent(SubLoop, SE);
+ })) {
LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Inner loop iteration count is "
"not consistent on each iteration\n");
return false;
@@ -789,6 +955,10 @@ bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
// ForeBlock phi operands before the subloop
// Make sure we can move all instructions we need to before the subloop
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ BasicBlockSet AftBlocks = AftBlocksMap[L];
+ Loop *SubLoop = L->getSubLoops()[0];
if (!processHeaderPhiOperands(
Header, Latch, AftBlocks, [&AftBlocks, &SubLoop](Instruction *I) {
if (SubLoop->contains(I->getParent()))
@@ -814,7 +984,8 @@ bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
// Check for memory dependencies which prohibit the unrolling we are doing.
// Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check
// there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub.
- if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI)) {
+ if (!checkDependencies(*L, SubLoopBlocks, ForeBlocksMap, AftBlocksMap, DI,
+ LI)) {
LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; failed dependency check\n");
return false;
}
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 7a168ff6f32b0..c653aacbee6cc 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -262,10 +262,9 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
// iteration. See if that makes !Pred become unknown again.
if (ICmpInst::isEquality(Pred) &&
!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
- RightSCEV)) {
- assert(!SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
- SE.isKnownPredicate(Pred, NextIterVal, RightSCEV) &&
- "Expected Pred to go from known to unknown.");
+ RightSCEV) &&
+ !SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
+ SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
if (!CanPeelOneMoreIteration())
continue; // Need to peel one more iteration, but can't. Give up.
PeelOneMoreIteration(); // Great!
@@ -280,17 +279,20 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
+ TargetTransformInfo::PeelingPreferences &PP,
unsigned &TripCount, ScalarEvolution &SE) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");
- // Save the UP.PeelCount value set by the target in
- // TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
- unsigned TargetPeelCount = UP.PeelCount;
- UP.PeelCount = 0;
+ // Save the PP.PeelCount value set by the target in
+ // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
+ unsigned TargetPeelCount = PP.PeelCount;
+ PP.PeelCount = 0;
if (!canPeel(L))
return;
- // Only try to peel innermost loops.
- if (!L->empty())
+ // Only try to peel innermost loops by default.
+ // The constraint can be relaxed by the target in TTI.getUnrollingPreferences
+ // or by the flag -unroll-allow-loop-nests-peeling.
+ if (!PP.AllowLoopNestsPeeling && !L->empty())
return;
// If the user provided a peel count, use that.
@@ -298,13 +300,13 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (UserPeelCount) {
LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
<< " iterations.\n");
- UP.PeelCount = UnrollForcePeelCount;
- UP.PeelProfiledIterations = true;
+ PP.PeelCount = UnrollForcePeelCount;
+ PP.PeelProfiledIterations = true;
return;
}
// Skip peeling if it's disabled.
- if (!UP.AllowPeeling)
+ if (!PP.AllowPeeling)
return;
unsigned AlreadyPeeled = 0;
@@ -353,8 +355,8 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
<< " iteration(s) to turn"
<< " some Phis into invariants.\n");
- UP.PeelCount = DesiredPeelCount;
- UP.PeelProfiledIterations = false;
+ PP.PeelCount = DesiredPeelCount;
+ PP.PeelProfiledIterations = false;
return;
}
}
@@ -366,7 +368,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
return;
// Do not apply profile base peeling if it is disabled.
- if (!UP.PeelProfiledIterations)
+ if (!PP.PeelProfiledIterations)
return;
// If we don't know the trip count, but have reason to believe the average
// trip count is low, peeling should be beneficial, since we will usually
@@ -386,7 +388,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
(LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
<< " iterations.\n");
- UP.PeelCount = *PeelCount;
+ PP.PeelCount = *PeelCount;
return;
}
LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
@@ -508,7 +510,10 @@ static void cloneLoopBlocks(
BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
NewBlocks.push_back(NewBB);
- if (ParentLoop)
+ // If an original block is an immediate child of the loop L, its copy
+ // is a child of a ParentLoop after peeling. If a block is a child of
+ // a nested loop, it is handled in the cloneLoop() call below.
+ if (ParentLoop && LI->getLoopFor(*BB) == L)
ParentLoop->addBasicBlockToLoop(NewBB, *LI);
VMap[*BB] = NewBB;
@@ -525,6 +530,12 @@ static void cloneLoopBlocks(
}
}
+ // Recursively create the new Loop objects for nested loops, if any,
+ // to preserve LoopInfo.
+ for (Loop *ChildLoop : *L) {
+ cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
+ }
+
// Hook-up the control flow for the newly inserted blocks.
// The new header is hooked up directly to the "top", which is either
// the original loop preheader (for the first iteration) or the previous
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index ddb7479924bdc..2515b1676cb99 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -25,7 +25,6 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Metadata.h"
@@ -37,6 +36,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <algorithm>
@@ -543,13 +543,11 @@ static bool canProfitablyUnrollMultiExitLoop(
/// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
/// EpilExit:
-bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
- bool AllowExpensiveTripCount,
- bool UseEpilogRemainder,
- bool UnrollRemainder, bool ForgetAllSCEV,
- LoopInfo *LI, ScalarEvolution *SE,
- DominatorTree *DT, AssumptionCache *AC,
- bool PreserveLCSSA, Loop **ResultLoop) {
+bool llvm::UnrollRuntimeLoopRemainder(
+ Loop *L, unsigned Count, bool AllowExpensiveTripCount,
+ bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV,
+ LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+ const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop) {
LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
LLVM_DEBUG(L->dump());
LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
@@ -637,7 +635,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
const DataLayout &DL = Header->getModule()->getDataLayout();
SCEVExpander Expander(*SE, DL, "loop-unroll");
if (!AllowExpensiveTripCount &&
- Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) {
+ Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget,
+ TTI, PreHeaderBR)) {
LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
return false;
}
@@ -849,7 +848,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// dominator of the exit blocks.
for (auto *BB : L->blocks()) {
auto *DomNodeBB = DT->getNode(BB);
- for (auto *DomChild : DomNodeBB->getChildren()) {
+ for (auto *DomChild : DomNodeBB->children()) {
auto *DomChildBB = DomChild->getBlock();
if (!L->contains(LI->getLoopFor(DomChildBB)))
ChildrenToUpdate.push_back(DomChildBB);
@@ -949,7 +948,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
/*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
/*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
/*PeelCount*/ 0, /*UnrollRemainder*/ false, ForgetAllSCEV},
- LI, SE, DT, AC, /*ORE*/ nullptr, PreserveLCSSA);
+ LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);
}
if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index c4c40189fda46..43363736684ee 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -11,12 +11,19 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PriorityWorklist.h"
#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
@@ -31,7 +38,9 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
@@ -39,10 +48,17 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
using namespace llvm;
using namespace llvm::PatternMatch;
+static cl::opt<bool> ForceReductionIntrinsic(
+ "force-reduction-intrinsics", cl::Hidden,
+ cl::desc("Force creating reduction intrinsics for testing."),
+ cl::init(false));
+
#define DEBUG_TYPE "loop-utils"
static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
@@ -496,20 +512,24 @@ llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) {
AddRegionToWorklist(N);
- for (size_t I = 0; I < Worklist.size(); I++)
- for (DomTreeNode *Child : Worklist[I]->getChildren())
+ for (size_t I = 0; I < Worklist.size(); I++) {
+ for (DomTreeNode *Child : Worklist[I]->children())
AddRegionToWorklist(Child);
+ }
return Worklist;
}
-void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
- ScalarEvolution *SE = nullptr,
- LoopInfo *LI = nullptr) {
+void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
+ LoopInfo *LI, MemorySSA *MSSA) {
assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
auto *Preheader = L->getLoopPreheader();
assert(Preheader && "Preheader should exist!");
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSA)
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
+
// Now that we know the removal is safe, remove the loop by changing the
// branch from the preheader to go to the single exit block.
//
@@ -582,18 +602,33 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
"Should have exactly one value and that's from the preheader!");
}
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ if (DT) {
+ DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}});
+ if (MSSA) {
+ MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}, *DT);
+ if (VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+ }
+ }
+
// Disconnect the loop body by branching directly to its exit.
Builder.SetInsertPoint(Preheader->getTerminator());
Builder.CreateBr(ExitBlock);
// Remove the old branch.
Preheader->getTerminator()->eraseFromParent();
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
if (DT) {
- // Update the dominator tree by informing it about the new edge from the
- // preheader to the exit and the removed edge.
- DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock},
- {DominatorTree::Delete, Preheader, L->getHeader()}});
+ DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}});
+ if (MSSA) {
+ MSSAU->applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}},
+ *DT);
+ SmallSetVector<BasicBlock *, 8> DeadBlockSet(L->block_begin(),
+ L->block_end());
+ MSSAU->removeBlocks(DeadBlockSet);
+ if (VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+ }
}
// Use a map to unique and a vector to guarantee deterministic ordering.
@@ -654,6 +689,9 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
for (auto *Block : L->blocks())
Block->dropAllReferences();
+ if (MSSA && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+
if (LI) {
// Erase the instructions and the blocks without having to worry
// about ordering because we already dropped the references.
@@ -676,11 +714,11 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
// its parent. While removeLoop/removeChildLoop remove the given loop but
// not relink its subloops, which is what we want.
if (Loop *ParentLoop = L->getParentLoop()) {
- Loop::iterator I = find(ParentLoop->begin(), ParentLoop->end(), L);
+ Loop::iterator I = find(*ParentLoop, L);
assert(I != ParentLoop->end() && "Couldn't find loop");
ParentLoop->removeChildLoop(I);
} else {
- Loop::iterator I = find(LI->begin(), LI->end(), L);
+ Loop::iterator I = find(*LI, L);
assert(I != LI->end() && "Couldn't find loop");
LI->removeLoop(I);
}
@@ -688,17 +726,17 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
}
}
-Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
- // Support loops with an exiting latch and other existing exists only
- // deoptimize.
-
- // Get the branch weights for the loop's backedge.
+/// Checks if \p L has single exit through latch block except possibly
+/// "deoptimizing" exits. Returns branch instruction terminating the loop
+/// latch if above check is successful, nullptr otherwise.
+static BranchInst *getExpectedExitLoopLatchBranch(Loop *L) {
BasicBlock *Latch = L->getLoopLatch();
if (!Latch)
- return None;
+ return nullptr;
+
BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
- return None;
+ return nullptr;
assert((LatchBR->getSuccessor(0) == L->getHeader() ||
LatchBR->getSuccessor(1) == L->getHeader()) &&
@@ -709,24 +747,73 @@ Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
if (any_of(ExitBlocks, [](const BasicBlock *EB) {
return !EB->getTerminatingDeoptimizeCall();
}))
+ return nullptr;
+
+ return LatchBR;
+}
+
+Optional<unsigned>
+llvm::getLoopEstimatedTripCount(Loop *L,
+ unsigned *EstimatedLoopInvocationWeight) {
+ // Support loops with an exiting latch and other existing exists only
+ // deoptimize.
+ BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L);
+ if (!LatchBranch)
return None;
// To estimate the number of times the loop body was executed, we want to
// know the number of times the backedge was taken, vs. the number of times
// we exited the loop.
uint64_t BackedgeTakenWeight, LatchExitWeight;
- if (!LatchBR->extractProfMetadata(BackedgeTakenWeight, LatchExitWeight))
+ if (!LatchBranch->extractProfMetadata(BackedgeTakenWeight, LatchExitWeight))
return None;
- if (LatchBR->getSuccessor(0) != L->getHeader())
+ if (LatchBranch->getSuccessor(0) != L->getHeader())
+ std::swap(BackedgeTakenWeight, LatchExitWeight);
+
+ if (!LatchExitWeight)
+ return None;
+
+ if (EstimatedLoopInvocationWeight)
+ *EstimatedLoopInvocationWeight = LatchExitWeight;
+
+ // Estimated backedge taken count is a ratio of the backedge taken weight by
+ // the weight of the edge exiting the loop, rounded to nearest.
+ uint64_t BackedgeTakenCount =
+ llvm::divideNearest(BackedgeTakenWeight, LatchExitWeight);
+ // Estimated trip count is one plus estimated backedge taken count.
+ return BackedgeTakenCount + 1;
+}
+
+bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
+ unsigned EstimatedloopInvocationWeight) {
+ // Support loops with an exiting latch and other existing exists only
+ // deoptimize.
+ BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L);
+ if (!LatchBranch)
+ return false;
+
+ // Calculate taken and exit weights.
+ unsigned LatchExitWeight = 0;
+ unsigned BackedgeTakenWeight = 0;
+
+ if (EstimatedTripCount > 0) {
+ LatchExitWeight = EstimatedloopInvocationWeight;
+ BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight;
+ }
+
+ // Make a swap if back edge is taken when condition is "false".
+ if (LatchBranch->getSuccessor(0) != L->getHeader())
std::swap(BackedgeTakenWeight, LatchExitWeight);
- if (!BackedgeTakenWeight || !LatchExitWeight)
- return 0;
+ MDBuilder MDB(LatchBranch->getContext());
- // Divide the count of the backedge by the count of the edge exiting the loop,
- // rounding to nearest.
- return llvm::divideNearest(BackedgeTakenWeight, LatchExitWeight);
+ // Set/Update profile metadata.
+ LatchBranch->setMetadata(
+ LLVMContext::MD_prof,
+ MDB.createBranchWeights(BackedgeTakenWeight, LatchExitWeight));
+
+ return true;
}
bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
@@ -751,7 +838,7 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
return true;
}
-Value *llvm::createMinMaxOp(IRBuilder<> &Builder,
+Value *llvm::createMinMaxOp(IRBuilderBase &Builder,
RecurrenceDescriptor::MinMaxRecurrenceKind RK,
Value *Left, Value *Right) {
CmpInst::Predicate P = CmpInst::ICMP_NE;
@@ -780,29 +867,22 @@ Value *llvm::createMinMaxOp(IRBuilder<> &Builder,
// We only match FP sequences that are 'fast', so we can unconditionally
// set it on any generated instructions.
- IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ IRBuilderBase::FastMathFlagGuard FMFG(Builder);
FastMathFlags FMF;
FMF.setFast();
Builder.setFastMathFlags(FMF);
-
- Value *Cmp;
- if (RK == RecurrenceDescriptor::MRK_FloatMin ||
- RK == RecurrenceDescriptor::MRK_FloatMax)
- Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
- else
- Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
-
+ Value *Cmp = Builder.CreateCmp(P, Left, Right, "rdx.minmax.cmp");
Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
return Select;
}
// Helper to generate an ordered reduction.
Value *
-llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src,
+llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
unsigned Op,
RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
ArrayRef<Value *> RedOps) {
- unsigned VF = Src->getType()->getVectorNumElements();
+ unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
// Extract and apply reduction ops in ascending order:
// e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
@@ -829,29 +909,27 @@ llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src,
// Helper to generate a log2 shuffle reduction.
Value *
-llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
+llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op,
RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
ArrayRef<Value *> RedOps) {
- unsigned VF = Src->getType()->getVectorNumElements();
+ unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
// and vector ops, reducing the set of values being computed by half each
// round.
assert(isPowerOf2_32(VF) &&
"Reduction emission only supported for pow2 vectors!");
Value *TmpVec = Src;
- SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);
+ SmallVector<int, 32> ShuffleMask(VF);
for (unsigned i = VF; i != 1; i >>= 1) {
// Move the upper half of the vector to the lower half.
for (unsigned j = 0; j != i / 2; ++j)
- ShuffleMask[j] = Builder.getInt32(i / 2 + j);
+ ShuffleMask[j] = i / 2 + j;
// Fill the rest of the mask with undef.
- std::fill(&ShuffleMask[i / 2], ShuffleMask.end(),
- UndefValue::get(Builder.getInt32Ty()));
+ std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
Value *Shuf = Builder.CreateShuffleVector(
- TmpVec, UndefValue::get(TmpVec->getType()),
- ConstantVector::get(ShuffleMask), "rdx.shuf");
+ TmpVec, UndefValue::get(TmpVec->getType()), ShuffleMask, "rdx.shuf");
if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
// The builder propagates its fast-math-flags setting.
@@ -864,6 +942,11 @@ llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
}
if (!RedOps.empty())
propagateIRFlags(TmpVec, RedOps);
+
+ // We may compute the reassociated scalar ops in a way that does not
+ // preserve nsw/nuw etc. Conservatively, drop those flags.
+ if (auto *ReductionInst = dyn_cast<Instruction>(TmpVec))
+ ReductionInst->dropPoisonGeneratingFlags();
}
// The result is in the first element of the vector.
return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
@@ -872,10 +955,10 @@ llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
/// Create a simple vector reduction specified by an opcode and some
/// flags (if generating min/max reductions).
Value *llvm::createSimpleTargetReduction(
- IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
+ IRBuilderBase &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
Value *Src, TargetTransformInfo::ReductionFlags Flags,
ArrayRef<Value *> RedOps) {
- assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
+ auto *SrcVTy = cast<VectorType>(Src->getType());
std::function<Value *()> BuildFunc;
using RD = RecurrenceDescriptor;
@@ -900,13 +983,13 @@ Value *llvm::createSimpleTargetReduction(
case Instruction::FAdd:
BuildFunc = [&]() {
auto Rdx = Builder.CreateFAddReduce(
- Constant::getNullValue(Src->getType()->getVectorElementType()), Src);
+ Constant::getNullValue(SrcVTy->getElementType()), Src);
return Rdx;
};
break;
case Instruction::FMul:
BuildFunc = [&]() {
- Type *Ty = Src->getType()->getVectorElementType();
+ Type *Ty = SrcVTy->getElementType();
auto Rdx = Builder.CreateFMulReduce(ConstantFP::get(Ty, 1.0), Src);
return Rdx;
};
@@ -937,13 +1020,14 @@ Value *llvm::createSimpleTargetReduction(
llvm_unreachable("Unhandled opcode");
break;
}
- if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))
+ if (ForceReductionIntrinsic ||
+ TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))
return BuildFunc();
return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);
}
/// Create a vector reduction using a given recurrence descriptor.
-Value *llvm::createTargetReduction(IRBuilder<> &B,
+Value *llvm::createTargetReduction(IRBuilderBase &B,
const TargetTransformInfo *TTI,
RecurrenceDescriptor &Desc, Value *Src,
bool NoNaN) {
@@ -955,7 +1039,7 @@ Value *llvm::createTargetReduction(IRBuilder<> &B,
// All ops in the reduction inherit fast-math-flags from the recurrence
// descriptor.
- IRBuilder<>::FastMathFlagGuard FMFGuard(B);
+ IRBuilderBase::FastMathFlagGuard FMFGuard(B);
B.setFastMathFlags(Desc.getFastMathFlags());
switch (RecKind) {
@@ -1042,3 +1126,586 @@ bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
SE.isLoopEntryGuardedByCond(L, Predicate, S,
SE.getConstant(Max));
}
+
+//===----------------------------------------------------------------------===//
+// rewriteLoopExitValues - Optimize IV users outside the loop.
+// As a side effect, reduces the amount of IV processing within the loop.
+//===----------------------------------------------------------------------===//
+
+// Return true if the SCEV expansion generated by the rewriter can replace the
+// original value. SCEV guarantees that it produces the same value, but the way
+// it is produced may be illegal IR. Ideally, this function will only be
+// called for verification.
+static bool isValidRewrite(ScalarEvolution *SE, Value *FromVal, Value *ToVal) {
+ // If an SCEV expression subsumed multiple pointers, its expansion could
+ // reassociate the GEP changing the base pointer. This is illegal because the
+ // final address produced by a GEP chain must be inbounds relative to its
+ // underlying object. Otherwise basic alias analysis, among other things,
+ // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
+ // producing an expression involving multiple pointers. Until then, we must
+ // bail out here.
+ //
+ // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
+ // because it understands lcssa phis while SCEV does not.
+ Value *FromPtr = FromVal;
+ Value *ToPtr = ToVal;
+ if (auto *GEP = dyn_cast<GEPOperator>(FromVal))
+ FromPtr = GEP->getPointerOperand();
+
+ if (auto *GEP = dyn_cast<GEPOperator>(ToVal))
+ ToPtr = GEP->getPointerOperand();
+
+ if (FromPtr != FromVal || ToPtr != ToVal) {
+ // Quickly check the common case
+ if (FromPtr == ToPtr)
+ return true;
+
+ // SCEV may have rewritten an expression that produces the GEP's pointer
+ // operand. That's ok as long as the pointer operand has the same base
+ // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
+ // base of a recurrence. This handles the case in which SCEV expansion
+ // converts a pointer type recurrence into a nonrecurrent pointer base
+ // indexed by an integer recurrence.
+
+ // If the GEP base pointer is a vector of pointers, abort.
+ if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
+ return false;
+
+ const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
+ const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
+ if (FromBase == ToBase)
+ return true;
+
+ LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: GEP rewrite bail out "
+ << *FromBase << " != " << *ToBase << "\n");
+
+ return false;
+ }
+ return true;
+}
+
+static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) {
+ SmallPtrSet<const Instruction *, 8> Visited;
+ SmallVector<const Instruction *, 8> WorkList;
+ Visited.insert(I);
+ WorkList.push_back(I);
+ while (!WorkList.empty()) {
+ const Instruction *Curr = WorkList.pop_back_val();
+ // This use is outside the loop, nothing to do.
+ if (!L->contains(Curr))
+ continue;
+ // Do we assume it is a "hard" use which will not be eliminated easily?
+ if (Curr->mayHaveSideEffects())
+ return true;
+ // Otherwise, add all its users to worklist.
+ for (auto U : Curr->users()) {
+ auto *UI = cast<Instruction>(U);
+ if (Visited.insert(UI).second)
+ WorkList.push_back(UI);
+ }
+ }
+ return false;
+}
+
+// Collect information about PHI nodes which can be transformed in
+// rewriteLoopExitValues.
+struct RewritePhi {
+ PHINode *PN; // For which PHI node is this replacement?
+ unsigned Ith; // For which incoming value?
+ const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting.
+ Instruction *ExpansionPoint; // Where we'd like to expand that SCEV?
+ bool HighCost; // Is this expansion a high-cost?
+
+ Value *Expansion = nullptr;
+ bool ValidRewrite = false;
+
+ RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt,
+ bool H)
+ : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
+ HighCost(H) {}
+};
+
+// Check whether it is possible to delete the loop after rewriting exit
+// value. If it is possible, ignore ReplaceExitValue and do rewriting
+// aggressively.
+static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ // If there is no preheader, the loop will not be deleted.
+ if (!Preheader)
+ return false;
+
+ // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
+ // We obviate multiple ExitingBlocks case for simplicity.
+ // TODO: If we see testcase with multiple ExitingBlocks can be deleted
+ // after exit value rewriting, we can enhance the logic here.
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+ if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
+ return false;
+
+ BasicBlock *ExitBlock = ExitBlocks[0];
+ BasicBlock::iterator BI = ExitBlock->begin();
+ while (PHINode *P = dyn_cast<PHINode>(BI)) {
+ Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
+
+ // If the Incoming value of P is found in RewritePhiSet, we know it
+ // could be rewritten to use a loop invariant value in transformation
+ // phase later. Skip it in the loop invariant check below.
+ bool found = false;
+ for (const RewritePhi &Phi : RewritePhiSet) {
+ if (!Phi.ValidRewrite)
+ continue;
+ unsigned i = Phi.Ith;
+ if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
+ found = true;
+ break;
+ }
+ }
+
+ Instruction *I;
+ if (!found && (I = dyn_cast<Instruction>(Incoming)))
+ if (!L->hasLoopInvariantOperands(I))
+ return false;
+
+ ++BI;
+ }
+
+ for (auto *BB : L->blocks())
+ if (llvm::any_of(*BB, [](Instruction &I) {
+ return I.mayHaveSideEffects();
+ }))
+ return false;
+
+ return true;
+}
+
+int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
+ ScalarEvolution *SE,
+ const TargetTransformInfo *TTI,
+ SCEVExpander &Rewriter, DominatorTree *DT,
+ ReplaceExitVal ReplaceExitValue,
+ SmallVector<WeakTrackingVH, 16> &DeadInsts) {
+ // Check a pre-condition.
+ assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
+ "Indvars did not preserve LCSSA!");
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+
+ SmallVector<RewritePhi, 8> RewritePhiSet;
+ // Find all values that are computed inside the loop, but used outside of it.
+ // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
+ // the exit blocks of the loop to find them.
+ for (BasicBlock *ExitBB : ExitBlocks) {
+ // If there are no PHI nodes in this exit block, then no values defined
+ // inside the loop are used on this path, skip it.
+ PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
+ if (!PN) continue;
+
+ unsigned NumPreds = PN->getNumIncomingValues();
+
+ // Iterate over all of the PHI nodes.
+ BasicBlock::iterator BBI = ExitBB->begin();
+ while ((PN = dyn_cast<PHINode>(BBI++))) {
+ if (PN->use_empty())
+ continue; // dead use, don't replace it
+
+ if (!SE->isSCEVable(PN->getType()))
+ continue;
+
+ // It's necessary to tell ScalarEvolution about this explicitly so that
+ // it can walk the def-use list and forget all SCEVs, as it may not be
+ // watching the PHI itself. Once the new exit value is in place, there
+ // may not be a def-use connection between the loop and every instruction
+ // which got a SCEVAddRecExpr for that loop.
+ SE->forgetValue(PN);
+
+ // Iterate over all of the values in all the PHI nodes.
+ for (unsigned i = 0; i != NumPreds; ++i) {
+ // If the value being merged in is not integer or is not defined
+ // in the loop, skip it.
+ Value *InVal = PN->getIncomingValue(i);
+ if (!isa<Instruction>(InVal))
+ continue;
+
+ // If this pred is for a subloop, not L itself, skip it.
+ if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
+ continue; // The Block is in a subloop, skip it.
+
+ // Check that InVal is defined in the loop.
+ Instruction *Inst = cast<Instruction>(InVal);
+ if (!L->contains(Inst))
+ continue;
+
+ // Okay, this instruction has a user outside of the current loop
+ // and varies predictably *inside* the loop. Evaluate the value it
+ // contains when the loop exits, if possible. We prefer to start with
+ // expressions which are true for all exits (so as to maximize
+ // expression reuse by the SCEVExpander), but resort to per-exit
+ // evaluation if that fails.
+ const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
+ if (isa<SCEVCouldNotCompute>(ExitValue) ||
+ !SE->isLoopInvariant(ExitValue, L) ||
+ !isSafeToExpand(ExitValue, *SE)) {
+ // TODO: This should probably be sunk into SCEV in some way; maybe a
+ // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
+ // most SCEV expressions and other recurrence types (e.g. shift
+ // recurrences). Is there existing code we can reuse?
+ const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
+ if (isa<SCEVCouldNotCompute>(ExitCount))
+ continue;
+ if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
+ if (AddRec->getLoop() == L)
+ ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
+ if (isa<SCEVCouldNotCompute>(ExitValue) ||
+ !SE->isLoopInvariant(ExitValue, L) ||
+ !isSafeToExpand(ExitValue, *SE))
+ continue;
+ }
+
+ // Computing the value outside of the loop brings no benefit if it is
+ // definitely used inside the loop in a way which can not be optimized
+ // away. Avoid doing so unless we know we have a value which computes
+ // the ExitValue already. TODO: This should be merged into SCEV
+ // expander to leverage its knowledge of existing expressions.
+ if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(ExitValue) &&
+ !isa<SCEVUnknown>(ExitValue) && hasHardUserWithinLoop(L, Inst))
+ continue;
+
+ // Check if expansions of this SCEV would count as being high cost.
+ bool HighCost = Rewriter.isHighCostExpansion(
+ ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst);
+
+ // Note that we must not perform expansions until after
+ // we query *all* the costs, because if we perform temporary expansion
+ // inbetween, one that we might not intend to keep, said expansion
+ // *may* affect cost calculation of the the next SCEV's we'll query,
+ // and next SCEV may errneously get smaller cost.
+
+ // Collect all the candidate PHINodes to be rewritten.
+ RewritePhiSet.emplace_back(PN, i, ExitValue, Inst, HighCost);
+ }
+ }
+ }
+
+ // Now that we've done preliminary filtering and billed all the SCEV's,
+ // we can perform the last sanity check - the expansion must be valid.
+ for (RewritePhi &Phi : RewritePhiSet) {
+ Phi.Expansion = Rewriter.expandCodeFor(Phi.ExpansionSCEV, Phi.PN->getType(),
+ Phi.ExpansionPoint);
+
+ LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = "
+ << *(Phi.Expansion) << '\n'
+ << " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
+
+ // FIXME: isValidRewrite() is a hack. it should be an assert, eventually.
+ Phi.ValidRewrite = isValidRewrite(SE, Phi.ExpansionPoint, Phi.Expansion);
+ if (!Phi.ValidRewrite) {
+ DeadInsts.push_back(Phi.Expansion);
+ continue;
+ }
+
+#ifndef NDEBUG
+ // If we reuse an instruction from a loop which is neither L nor one of
+ // its containing loops, we end up breaking LCSSA form for this loop by
+ // creating a new use of its instruction.
+ if (auto *ExitInsn = dyn_cast<Instruction>(Phi.Expansion))
+ if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
+ if (EVL != L)
+ assert(EVL->contains(L) && "LCSSA breach detected!");
+#endif
+ }
+
+ // TODO: after isValidRewrite() is an assertion, evaluate whether
+ // it is beneficial to change how we calculate high-cost:
+ // if we have SCEV 'A' which we know we will expand, should we calculate
+ // the cost of other SCEV's after expanding SCEV 'A',
+ // thus potentially giving cost bonus to those other SCEV's?
+
+ bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
+ int NumReplaced = 0;
+
+ // Transformation.
+ for (const RewritePhi &Phi : RewritePhiSet) {
+ if (!Phi.ValidRewrite)
+ continue;
+
+ PHINode *PN = Phi.PN;
+ Value *ExitVal = Phi.Expansion;
+
+ // Only do the rewrite when the ExitValue can be expanded cheaply.
+ // If LoopCanBeDel is true, rewrite exit value aggressively.
+ if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
+ DeadInsts.push_back(ExitVal);
+ continue;
+ }
+
+ NumReplaced++;
+ Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
+ PN->setIncomingValue(Phi.Ith, ExitVal);
+
+ // If this instruction is dead now, delete it. Don't do it now to avoid
+ // invalidating iterators.
+ if (isInstructionTriviallyDead(Inst, TLI))
+ DeadInsts.push_back(Inst);
+
+ // Replace PN with ExitVal if that is legal and does not break LCSSA.
+ if (PN->getNumIncomingValues() == 1 &&
+ LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
+ PN->replaceAllUsesWith(ExitVal);
+ PN->eraseFromParent();
+ }
+ }
+
+ // The insertion point instruction may have been deleted; clear it out
+ // so that the rewriter doesn't trip over it later.
+ Rewriter.clearInsertPoint();
+ return NumReplaced;
+}
+
+/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
+/// \p OrigLoop.
+void llvm::setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
+ Loop *RemainderLoop, uint64_t UF) {
+ assert(UF > 0 && "Zero unrolled factor is not supported");
+ assert(UnrolledLoop != RemainderLoop &&
+ "Unrolled and Remainder loops are expected to distinct");
+
+ // Get number of iterations in the original scalar loop.
+ unsigned OrigLoopInvocationWeight = 0;
+ Optional<unsigned> OrigAverageTripCount =
+ getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight);
+ if (!OrigAverageTripCount)
+ return;
+
+ // Calculate number of iterations in unrolled loop.
+ unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF;
+ // Calculate number of iterations for remainder loop.
+ unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF;
+
+ setLoopEstimatedTripCount(UnrolledLoop, UnrolledAverageTripCount,
+ OrigLoopInvocationWeight);
+ setLoopEstimatedTripCount(RemainderLoop, RemainderAverageTripCount,
+ OrigLoopInvocationWeight);
+}
+
+/// Utility that implements appending of loops onto a worklist.
+/// Loops are added in preorder (analogous for reverse postorder for trees),
+/// and the worklist is processed LIFO.
+template <typename RangeT>
+void llvm::appendReversedLoopsToWorklist(
+ RangeT &&Loops, SmallPriorityWorklist<Loop *, 4> &Worklist) {
+ // We use an internal worklist to build up the preorder traversal without
+ // recursion.
+ SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
+
+ // We walk the initial sequence of loops in reverse because we generally want
+ // to visit defs before uses and the worklist is LIFO.
+ for (Loop *RootL : Loops) {
+ assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
+ assert(PreOrderWorklist.empty() &&
+ "Must start with an empty preorder walk worklist.");
+ PreOrderWorklist.push_back(RootL);
+ do {
+ Loop *L = PreOrderWorklist.pop_back_val();
+ PreOrderWorklist.append(L->begin(), L->end());
+ PreOrderLoops.push_back(L);
+ } while (!PreOrderWorklist.empty());
+
+ Worklist.insert(std::move(PreOrderLoops));
+ PreOrderLoops.clear();
+ }
+}
+
+template <typename RangeT>
+void llvm::appendLoopsToWorklist(RangeT &&Loops,
+ SmallPriorityWorklist<Loop *, 4> &Worklist) {
+ appendReversedLoopsToWorklist(reverse(Loops), Worklist);
+}
+
+template void llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>(
+ ArrayRef<Loop *> &Loops, SmallPriorityWorklist<Loop *, 4> &Worklist);
+
+template void
+llvm::appendLoopsToWorklist<Loop &>(Loop &L,
+ SmallPriorityWorklist<Loop *, 4> &Worklist);
+
+void llvm::appendLoopsToWorklist(LoopInfo &LI,
+ SmallPriorityWorklist<Loop *, 4> &Worklist) {
+ appendReversedLoopsToWorklist(LI, Worklist);
+}
+
+Loop *llvm::cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
+ LoopInfo *LI, LPPassManager *LPM) {
+ Loop &New = *LI->AllocateLoop();
+ if (PL)
+ PL->addChildLoop(&New);
+ else
+ LI->addTopLevelLoop(&New);
+
+ if (LPM)
+ LPM->addLoop(New);
+
+ // Add all of the blocks in L to the new loop.
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ if (LI->getLoopFor(*I) == L)
+ New.addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
+
+ // Add all of the subloops to the new loop.
+ for (Loop *I : *L)
+ cloneLoop(I, &New, VM, LI, LPM);
+
+ return &New;
+}
+
+/// IR Values for the lower and upper bounds of a pointer evolution. We
+/// need to use value-handles because SCEV expansion can invalidate previously
+/// expanded values. Thus expansion of a pointer can invalidate the bounds for
+/// a previous one.
+struct PointerBounds {
+ TrackingVH<Value> Start;
+ TrackingVH<Value> End;
+};
+
+/// Expand code for the lower and upper bound of the pointer group \p CG
+/// in \p TheLoop. \return the values for the bounds.
+static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,
+ Loop *TheLoop, Instruction *Loc,
+ SCEVExpander &Exp, ScalarEvolution *SE) {
+ // TODO: Add helper to retrieve pointers to CG.
+ Value *Ptr = CG->RtCheck.Pointers[CG->Members[0]].PointerValue;
+ const SCEV *Sc = SE->getSCEV(Ptr);
+
+ unsigned AS = Ptr->getType()->getPointerAddressSpace();
+ LLVMContext &Ctx = Loc->getContext();
+
+ // Use this type for pointer arithmetic.
+ Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
+
+ if (SE->isLoopInvariant(Sc, TheLoop)) {
+ LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:"
+ << *Ptr << "\n");
+ // Ptr could be in the loop body. If so, expand a new one at the correct
+ // location.
+ Instruction *Inst = dyn_cast<Instruction>(Ptr);
+ Value *NewPtr = (Inst && TheLoop->contains(Inst))
+ ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
+ : Ptr;
+ // We must return a half-open range, which means incrementing Sc.
+ const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy));
+ Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc);
+ return {NewPtr, NewPtrPlusOne};
+ } else {
+ Value *Start = nullptr, *End = nullptr;
+ LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
+ Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
+ End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
+ LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High
+ << "\n");
+ return {Start, End};
+ }
+}
+
+/// Turns a collection of checks into a collection of expanded upper and
+/// lower bounds for both pointers in the check.
+static SmallVector<std::pair<PointerBounds, PointerBounds>, 4>
+expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
+ Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp) {
+ SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;
+
+ // Here we're relying on the SCEV Expander's cache to only emit code for the
+ // same bounds once.
+ transform(PointerChecks, std::back_inserter(ChecksWithBounds),
+ [&](const RuntimePointerCheck &Check) {
+ PointerBounds First = expandBounds(Check.first, L, Loc, Exp, SE),
+ Second =
+ expandBounds(Check.second, L, Loc, Exp, SE);
+ return std::make_pair(First, Second);
+ });
+
+ return ChecksWithBounds;
+}
+
+std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks(
+ Instruction *Loc, Loop *TheLoop,
+ const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
+ ScalarEvolution *SE) {
+ // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
+ // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
+ const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
+ SCEVExpander Exp(*SE, DL, "induction");
+ auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, SE, Exp);
+
+ LLVMContext &Ctx = Loc->getContext();
+ Instruction *FirstInst = nullptr;
+ IRBuilder<> ChkBuilder(Loc);
+ // Our instructions might fold to a constant.
+ Value *MemoryRuntimeCheck = nullptr;
+
+ // FIXME: this helper is currently a duplicate of the one in
+ // LoopVectorize.cpp.
+ auto GetFirstInst = [](Instruction *FirstInst, Value *V,
+ Instruction *Loc) -> Instruction * {
+ if (FirstInst)
+ return FirstInst;
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() == Loc->getParent() ? I : nullptr;
+ return nullptr;
+ };
+
+ for (const auto &Check : ExpandedChecks) {
+ const PointerBounds &A = Check.first, &B = Check.second;
+ // Check if two pointers (A and B) conflict where conflict is computed as:
+ // start(A) <= end(B) && start(B) <= end(A)
+ unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
+ unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
+
+ assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
+ (AS1 == A.End->getType()->getPointerAddressSpace()) &&
+ "Trying to bounds check pointers with different address spaces");
+
+ Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
+ Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
+
+ Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
+ Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
+ Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
+ Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
+
+ // [A|B].Start points to the first accessed byte under base [A|B].
+ // [A|B].End points to the last accessed byte, plus one.
+ // There is no conflict when the intervals are disjoint:
+ // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
+ //
+ // bound0 = (B.Start < A.End)
+ // bound1 = (A.Start < B.End)
+ // IsConflict = bound0 & bound1
+ Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
+ FirstInst = GetFirstInst(FirstInst, Cmp0, Loc);
+ Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
+ FirstInst = GetFirstInst(FirstInst, Cmp1, Loc);
+ Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
+ FirstInst = GetFirstInst(FirstInst, IsConflict, Loc);
+ if (MemoryRuntimeCheck) {
+ IsConflict =
+ ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
+ FirstInst = GetFirstInst(FirstInst, IsConflict, Loc);
+ }
+ MemoryRuntimeCheck = IsConflict;
+ }
+
+ if (!MemoryRuntimeCheck)
+ return std::make_pair(nullptr, nullptr);
+
+ // We have to do this trickery because the IRBuilder might fold the check to a
+ // constant expression in which case there is no Instruction anchored in a
+ // the block.
+ Instruction *Check =
+ BinaryOperator::CreateAnd(MemoryRuntimeCheck, ConstantInt::getTrue(Ctx));
+ ChkBuilder.Insert(Check, "memcheck.conflict");
+ FirstInst = GetFirstInst(FirstInst, Check, Loc);
+ return std::make_pair(FirstInst, Check);
+}
diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index 50752bd78a650..16bd08c704eeb 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -13,15 +13,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
using namespace llvm;
@@ -44,9 +45,8 @@ LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
}
}
-void LoopVersioning::setAliasChecks(
- SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks) {
- AliasChecks = std::move(Checks);
+void LoopVersioning::setAliasChecks(ArrayRef<RuntimePointerCheck> Checks) {
+ AliasChecks = {Checks.begin(), Checks.end()};
}
void LoopVersioning::setSCEVChecks(SCEVUnionPredicate Check) {
@@ -62,8 +62,10 @@ void LoopVersioning::versionLoop(
// Add the memcheck in the original preheader (this is empty initially).
BasicBlock *RuntimeCheckBB = VersionedLoop->getLoopPreheader();
+ const auto &RtPtrChecking = *LAI.getRuntimePointerChecking();
std::tie(FirstCheckInst, MemRuntimeCheck) =
- LAI.addRuntimeChecks(RuntimeCheckBB->getTerminator(), AliasChecks);
+ addRuntimeChecks(RuntimeCheckBB->getTerminator(), VersionedLoop,
+ AliasChecks, RtPtrChecking.getSE());
const SCEVUnionPredicate &Pred = LAI.getPSE().getUnionPredicate();
SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(),
@@ -194,8 +196,7 @@ void LoopVersioning::prepareNoAliasMetadata() {
// Go through the checks and for each pointer group, collect the scopes for
// each non-aliasing pointer group.
- DenseMap<const RuntimePointerChecking::CheckingPtrGroup *,
- SmallVector<Metadata *, 4>>
+ DenseMap<const RuntimeCheckingPtrGroup *, SmallVector<Metadata *, 4>>
GroupToNonAliasingScopes;
for (const auto &Check : AliasChecks)
diff --git a/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/llvm/lib/Transforms/Utils/LowerInvoke.cpp
index 1af0ce3d86cc1..0b225e8abc4e7 100644
--- a/llvm/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -53,7 +53,7 @@ static bool runImpl(Function &F) {
II->getOperandBundlesAsDefs(OpBundles);
// Insert a normal call instruction...
CallInst *NewCall =
- CallInst::Create(II->getFunctionType(), II->getCalledValue(),
+ CallInst::Create(II->getFunctionType(), II->getCalledOperand(),
CallArgs, OpBundles, "", II);
NewCall->takeName(II);
NewCall->setCallingConv(II->getCallingConv());
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 0cc085dc366c6..616b4e8eb01c9 100644
--- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -14,17 +14,9 @@
using namespace llvm;
-static unsigned getLoopOperandSizeInBytes(Type *Type) {
- if (VectorType *VTy = dyn_cast<VectorType>(Type)) {
- return VTy->getBitWidth() / 8;
- }
-
- return Type->getPrimitiveSizeInBits() / 8;
-}
-
void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
Value *DstAddr, ConstantInt *CopyLen,
- unsigned SrcAlign, unsigned DestAlign,
+ Align SrcAlign, Align DstAlign,
bool SrcIsVolatile, bool DstIsVolatile,
const TargetTransformInfo &TTI) {
// No need to expand zero length copies.
@@ -35,17 +27,18 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
BasicBlock *PostLoopBB = nullptr;
Function *ParentFunc = PreLoopBB->getParent();
LLVMContext &Ctx = PreLoopBB->getContext();
+ const DataLayout &DL = ParentFunc->getParent()->getDataLayout();
+
+ unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
+ unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
Type *TypeOfCopyLen = CopyLen->getType();
- Type *LoopOpType =
- TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign);
+ Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
+ Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value());
- unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType);
+ unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize;
- unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
- unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
-
if (LoopEndCount != 0) {
// Split
PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split");
@@ -66,16 +59,20 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
}
+ Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
+ Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
+
IRBuilder<> LoopBuilder(LoopBB);
PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index");
LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB);
// Loop Body
Value *SrcGEP =
LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
- Value *Load = LoopBuilder.CreateLoad(LoopOpType, SrcGEP, SrcIsVolatile);
+ Value *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
+ PartSrcAlign, SrcIsVolatile);
Value *DstGEP =
LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
- LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
+ LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
Value *NewIndex =
LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U));
@@ -93,17 +90,17 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI()
: InsertBefore);
- // Update the alignment based on the copy size used in the loop body.
- SrcAlign = std::min(SrcAlign, LoopOpSize);
- DestAlign = std::min(DestAlign, LoopOpSize);
-
SmallVector<Type *, 5> RemainingOps;
TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
- SrcAlign, DestAlign);
+ SrcAS, DstAS, SrcAlign.value(),
+ DstAlign.value());
for (auto OpTy : RemainingOps) {
+ Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied));
+ Align PartDstAlign(commonAlignment(DstAlign, BytesCopied));
+
// Calaculate the new index
- unsigned OperandSize = getLoopOperandSizeInBytes(OpTy);
+ unsigned OperandSize = DL.getTypeStoreSize(OpTy);
uint64_t GepIndex = BytesCopied / OperandSize;
assert(GepIndex * OperandSize == BytesCopied &&
"Division should have no Remainder!");
@@ -114,7 +111,8 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
: RBuilder.CreateBitCast(SrcAddr, SrcPtrType);
Value *SrcGEP = RBuilder.CreateInBoundsGEP(
OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex));
- Value *Load = RBuilder.CreateLoad(OpTy, SrcGEP, SrcIsVolatile);
+ Value *Load =
+ RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile);
// Cast destination to operand type and store.
PointerType *DstPtrType = PointerType::get(OpTy, DstAS);
@@ -123,7 +121,7 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
: RBuilder.CreateBitCast(DstAddr, DstPtrType);
Value *DstGEP = RBuilder.CreateInBoundsGEP(
OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex));
- RBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
+ RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
BytesCopied += OperandSize;
}
@@ -134,8 +132,8 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
Value *SrcAddr, Value *DstAddr,
- Value *CopyLen, unsigned SrcAlign,
- unsigned DestAlign, bool SrcIsVolatile,
+ Value *CopyLen, Align SrcAlign,
+ Align DstAlign, bool SrcIsVolatile,
bool DstIsVolatile,
const TargetTransformInfo &TTI) {
BasicBlock *PreLoopBB = InsertBefore->getParent();
@@ -143,16 +141,17 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion");
Function *ParentFunc = PreLoopBB->getParent();
+ const DataLayout &DL = ParentFunc->getParent()->getDataLayout();
LLVMContext &Ctx = PreLoopBB->getContext();
+ unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
+ unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
- Type *LoopOpType =
- TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign);
- unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType);
+ Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
+ Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value());
+ unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
- unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
- unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
if (SrcAddr->getType() != SrcOpType) {
@@ -177,13 +176,17 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB);
IRBuilder<> LoopBuilder(LoopBB);
+ Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
+ Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
+
PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index");
LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB);
Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
- Value *Load = LoopBuilder.CreateLoad(LoopOpType, SrcGEP, SrcIsVolatile);
+ Value *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, PartSrcAlign,
+ SrcIsVolatile);
Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
- LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
+ LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
Value *NewIndex =
LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U));
@@ -234,10 +237,11 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
Value *SrcGEP =
ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset);
- Value *Load = ResBuilder.CreateLoad(Int8Type, SrcGEP, SrcIsVolatile);
+ Value *Load = ResBuilder.CreateAlignedLoad(Int8Type, SrcGEP, PartSrcAlign,
+ SrcIsVolatile);
Value *DstGEP =
ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset);
- ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
+ ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
Value *ResNewIndex =
ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U));
@@ -284,13 +288,14 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
// }
// return dst;
// }
-static void createMemMoveLoop(Instruction *InsertBefore,
- Value *SrcAddr, Value *DstAddr, Value *CopyLen,
- unsigned SrcAlign, unsigned DestAlign,
- bool SrcIsVolatile, bool DstIsVolatile) {
+static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
+ Value *DstAddr, Value *CopyLen, Align SrcAlign,
+ Align DstAlign, bool SrcIsVolatile,
+ bool DstIsVolatile) {
Type *TypeOfCopyLen = CopyLen->getType();
BasicBlock *OrigBB = InsertBefore->getParent();
Function *F = OrigBB->getParent();
+ const DataLayout &DL = F->getParent()->getDataLayout();
Type *EltTy = cast<PointerType>(SrcAddr->getType())->getElementType();
@@ -318,6 +323,10 @@ static void createMemMoveLoop(Instruction *InsertBefore,
BasicBlock *ExitBB = InsertBefore->getParent();
ExitBB->setName("memmove_done");
+ unsigned PartSize = DL.getTypeStoreSize(EltTy);
+ Align PartSrcAlign(commonAlignment(SrcAlign, PartSize));
+ Align PartDstAlign(commonAlignment(DstAlign, PartSize));
+
// Initial comparison of n == 0 that lets us skip the loops altogether. Shared
// between both backwards and forward copy clauses.
ICmpInst *CompareN =
@@ -331,11 +340,12 @@ static void createMemMoveLoop(Instruction *InsertBefore,
PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
Value *IndexPtr = LoopBuilder.CreateSub(
LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
- Value *Element = LoopBuilder.CreateLoad(
+ Value *Element = LoopBuilder.CreateAlignedLoad(
EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr),
- "element");
- LoopBuilder.CreateStore(
- Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr));
+ PartSrcAlign, "element");
+ LoopBuilder.CreateAlignedStore(
+ Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr),
+ PartDstAlign);
LoopBuilder.CreateCondBr(
LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
ExitBB, LoopBB);
@@ -349,11 +359,11 @@ static void createMemMoveLoop(Instruction *InsertBefore,
BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB);
IRBuilder<> FwdLoopBuilder(FwdLoopBB);
PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr");
- Value *FwdElement = FwdLoopBuilder.CreateLoad(
- EltTy, FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi),
- "element");
- FwdLoopBuilder.CreateStore(
- FwdElement, FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi));
+ Value *SrcGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi);
+ Value *FwdElement =
+ FwdLoopBuilder.CreateAlignedLoad(EltTy, SrcGEP, PartSrcAlign, "element");
+ Value *DstGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi);
+ FwdLoopBuilder.CreateAlignedStore(FwdElement, DstGEP, PartDstAlign);
Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment");
FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
@@ -365,12 +375,13 @@ static void createMemMoveLoop(Instruction *InsertBefore,
ElseTerm->eraseFromParent();
}
-static void createMemSetLoop(Instruction *InsertBefore,
- Value *DstAddr, Value *CopyLen, Value *SetValue,
- unsigned Align, bool IsVolatile) {
+static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
+ Value *CopyLen, Value *SetValue, Align DstAlign,
+ bool IsVolatile) {
Type *TypeOfCopyLen = CopyLen->getType();
BasicBlock *OrigBB = InsertBefore->getParent();
Function *F = OrigBB->getParent();
+ const DataLayout &DL = F->getParent()->getDataLayout();
BasicBlock *NewBB =
OrigBB->splitBasicBlock(InsertBefore, "split");
BasicBlock *LoopBB
@@ -388,14 +399,17 @@ static void createMemSetLoop(Instruction *InsertBefore,
LoopBB);
OrigBB->getTerminator()->eraseFromParent();
+ unsigned PartSize = DL.getTypeStoreSize(SetValue->getType());
+ Align PartAlign(commonAlignment(DstAlign, PartSize));
+
IRBuilder<> LoopBuilder(LoopBB);
PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
- LoopBuilder.CreateStore(
+ LoopBuilder.CreateAlignedStore(
SetValue,
LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex),
- IsVolatile);
+ PartAlign, IsVolatile);
Value *NewIndex =
LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
@@ -408,25 +422,27 @@ static void createMemSetLoop(Instruction *InsertBefore,
void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
const TargetTransformInfo &TTI) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
- createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy,
- /* SrcAddr */ Memcpy->getRawSource(),
- /* DstAddr */ Memcpy->getRawDest(),
- /* CopyLen */ CI,
- /* SrcAlign */ Memcpy->getSourceAlignment(),
- /* DestAlign */ Memcpy->getDestAlignment(),
- /* SrcIsVolatile */ Memcpy->isVolatile(),
- /* DstIsVolatile */ Memcpy->isVolatile(),
- /* TargetTransformInfo */ TTI);
+ createMemCpyLoopKnownSize(
+ /* InsertBefore */ Memcpy,
+ /* SrcAddr */ Memcpy->getRawSource(),
+ /* DstAddr */ Memcpy->getRawDest(),
+ /* CopyLen */ CI,
+ /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(),
+ /* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
+ /* SrcIsVolatile */ Memcpy->isVolatile(),
+ /* DstIsVolatile */ Memcpy->isVolatile(),
+ /* TargetTransformInfo */ TTI);
} else {
- createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy,
- /* SrcAddr */ Memcpy->getRawSource(),
- /* DstAddr */ Memcpy->getRawDest(),
- /* CopyLen */ Memcpy->getLength(),
- /* SrcAlign */ Memcpy->getSourceAlignment(),
- /* DestAlign */ Memcpy->getDestAlignment(),
- /* SrcIsVolatile */ Memcpy->isVolatile(),
- /* DstIsVolatile */ Memcpy->isVolatile(),
- /* TargetTransfomrInfo */ TTI);
+ createMemCpyLoopUnknownSize(
+ /* InsertBefore */ Memcpy,
+ /* SrcAddr */ Memcpy->getRawSource(),
+ /* DstAddr */ Memcpy->getRawDest(),
+ /* CopyLen */ Memcpy->getLength(),
+ /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(),
+ /* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
+ /* SrcIsVolatile */ Memcpy->isVolatile(),
+ /* DstIsVolatile */ Memcpy->isVolatile(),
+ /* TargetTransfomrInfo */ TTI);
}
}
@@ -435,8 +451,8 @@ void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) {
/* SrcAddr */ Memmove->getRawSource(),
/* DstAddr */ Memmove->getRawDest(),
/* CopyLen */ Memmove->getLength(),
- /* SrcAlign */ Memmove->getSourceAlignment(),
- /* DestAlign */ Memmove->getDestAlignment(),
+ /* SrcAlign */ Memmove->getSourceAlign().valueOrOne(),
+ /* DestAlign */ Memmove->getDestAlign().valueOrOne(),
/* SrcIsVolatile */ Memmove->isVolatile(),
/* DstIsVolatile */ Memmove->isVolatile());
}
@@ -446,6 +462,6 @@ void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
/* DstAddr */ Memset->getRawDest(),
/* CopyLen */ Memset->getLength(),
/* SetValue */ Memset->getValue(),
- /* Alignment */ Memset->getDestAlignment(),
+ /* Alignment */ Memset->getDestAlign().valueOrOne(),
Memset->isVolatile());
}
diff --git a/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 4b9d0dadfc173..34e836d9660f3 100644
--- a/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -148,13 +148,6 @@ bool LowerSwitch::runOnFunction(Function &F) {
LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>();
AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr;
- // Prevent LazyValueInfo from using the DominatorTree as LowerSwitch does not
- // preserve it and it becomes stale (when available) pretty much immediately.
- // Currently the DominatorTree is only used by LowerSwitch indirectly via LVI
- // and computeKnownBits to refine isValidAssumeForContext's results. Given
- // that the latter can handle some of the simple cases w/o a DominatorTree,
- // it's easier to refrain from using the tree than to keep it up to date.
- LVI->disableDT();
bool Changed = false;
SmallPtrSet<BasicBlock*, 8> DeleteList;
diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index b94f57e4dc2ca..ef9f18a2289e9 100644
--- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -11,15 +11,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/raw_ostream.h"
-
using namespace llvm;
+#define DEBUG_TYPE "moduleutils"
+
static void appendToGlobalArray(const char *Array, Module &M, Function *F,
int Priority, Constant *Data) {
IRBuilder<> IRB(M.getContext());
@@ -117,6 +119,15 @@ llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
AttributeList());
}
+Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
+ Function *Ctor = Function::Create(
+ FunctionType::get(Type::getVoidTy(M.getContext()), false),
+ GlobalValue::InternalLinkage, CtorName, &M);
+ BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
+ ReturnInst::Create(M.getContext(), CtorBB);
+ return Ctor;
+}
+
std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
Module &M, StringRef CtorName, StringRef InitName,
ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
@@ -126,11 +137,8 @@ std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
"Sanitizer's init function expects different number of arguments");
FunctionCallee InitFunction =
declareSanitizerInitFunction(M, InitName, InitArgTypes);
- Function *Ctor = Function::Create(
- FunctionType::get(Type::getVoidTy(M.getContext()), false),
- GlobalValue::InternalLinkage, CtorName, &M);
- BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
- IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB));
+ Function *Ctor = createSanitizerCtor(M, CtorName);
+ IRBuilder<> IRB(Ctor->getEntryBlock().getTerminator());
IRB.CreateCall(InitFunction, InitArgs);
if (!VersionCheckName.empty()) {
FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
@@ -298,8 +306,9 @@ void VFABI::setVectorVariantNames(
Module *M = CI->getModule();
#ifndef NDEBUG
for (const std::string &VariantMapping : VariantMappings) {
- Optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping);
- assert(VI.hasValue() && "Canno add an invalid VFABI name.");
+ LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n");
+ Optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping, *M);
+ assert(VI.hasValue() && "Cannot add an invalid VFABI name.");
assert(M->getNamedValue(VI.getValue().VectorName) &&
"Cannot add variant to attribute: "
"vector function declaration is missing.");
diff --git a/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
index 1c5c41abc6823..7083789267d9c 100644
--- a/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
+++ b/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
@@ -55,7 +55,7 @@ public:
Hasher.final(Hash);
SmallString<32> Result;
MD5::stringifyResult(Hash, Result);
- TheHash = Result.str();
+ TheHash = std::string(Result.str());
return TheHash;
}
};
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index dda2867f44b24..99b64a7462f62 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -31,6 +31,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/FormattedStream.h"
@@ -39,7 +40,6 @@
#define DEBUG_TYPE "predicateinfo"
using namespace llvm;
using namespace PatternMatch;
-using namespace llvm::PredicateInfoClasses;
INITIALIZE_PASS_BEGIN(PredicateInfoPrinterLegacyPass, "print-predicateinfo",
"PredicateInfo Printer", false, false)
@@ -83,7 +83,6 @@ getBlockEdge(const PredicateBase *PB) {
}
namespace llvm {
-namespace PredicateInfoClasses {
enum LocalNum {
// Operations that must appear first in the block.
LN_First,
@@ -109,8 +108,7 @@ struct ValueDFS {
};
// Perform a strict weak ordering on instructions and arguments.
-static bool valueComesBefore(OrderedInstructions &OI, const Value *A,
- const Value *B) {
+static bool valueComesBefore(const Value *A, const Value *B) {
auto *ArgA = dyn_cast_or_null<Argument>(A);
auto *ArgB = dyn_cast_or_null<Argument>(B);
if (ArgA && !ArgB)
@@ -119,17 +117,14 @@ static bool valueComesBefore(OrderedInstructions &OI, const Value *A,
return false;
if (ArgA && ArgB)
return ArgA->getArgNo() < ArgB->getArgNo();
- return OI.dfsBefore(cast<Instruction>(A), cast<Instruction>(B));
+ return cast<Instruction>(A)->comesBefore(cast<Instruction>(B));
}
-// This compares ValueDFS structures, creating OrderedBasicBlocks where
-// necessary to compare uses/defs in the same block. Doing so allows us to walk
-// the minimum number of instructions necessary to compute our def/use ordering.
+// This compares ValueDFS structures. Doing so allows us to walk the minimum
+// number of instructions necessary to compute our def/use ordering.
struct ValueDFS_Compare {
DominatorTree &DT;
- OrderedInstructions &OI;
- ValueDFS_Compare(DominatorTree &DT, OrderedInstructions &OI)
- : DT(DT), OI(OI) {}
+ ValueDFS_Compare(DominatorTree &DT) : DT(DT) {}
bool operator()(const ValueDFS &A, const ValueDFS &B) const {
if (&A == &B)
@@ -210,14 +205,14 @@ struct ValueDFS_Compare {
// numbering will say the placed predicaeinfos should go first (IE
// LN_beginning), so we won't be in this function. For assumes, we will end
// up here, beause we need to order the def we will place relative to the
- // assume. So for the purpose of ordering, we pretend the def is the assume
- // because that is where we will insert the info.
+ // assume. So for the purpose of ordering, we pretend the def is right
+ // after the assume, because that is where we will insert the info.
if (!VD.U) {
assert(VD.PInfo &&
"No def, no use, and no predicateinfo should not occur");
assert(isa<PredicateAssume>(VD.PInfo) &&
"Middle of block should only occur for assumes");
- return cast<PredicateAssume>(VD.PInfo)->AssumeInst;
+ return cast<PredicateAssume>(VD.PInfo)->AssumeInst->getNextNode();
}
return nullptr;
}
@@ -243,18 +238,71 @@ struct ValueDFS_Compare {
auto *ArgB = dyn_cast_or_null<Argument>(BDef);
if (ArgA || ArgB)
- return valueComesBefore(OI, ArgA, ArgB);
+ return valueComesBefore(ArgA, ArgB);
auto *AInst = getDefOrUser(ADef, A.U);
auto *BInst = getDefOrUser(BDef, B.U);
- return valueComesBefore(OI, AInst, BInst);
+ return valueComesBefore(AInst, BInst);
}
};
-} // namespace PredicateInfoClasses
+class PredicateInfoBuilder {
+ // Used to store information about each value we might rename.
+ struct ValueInfo {
+ SmallVector<PredicateBase *, 4> Infos;
+ };
+
+ PredicateInfo &PI;
+ Function &F;
+ DominatorTree &DT;
+ AssumptionCache &AC;
+
+ // This stores info about each operand or comparison result we make copies
+ // of. The real ValueInfos start at index 1, index 0 is unused so that we
+ // can more easily detect invalid indexing.
+ SmallVector<ValueInfo, 32> ValueInfos;
+
+ // This gives the index into the ValueInfos array for a given Value. Because
+ // 0 is not a valid Value Info index, you can use DenseMap::lookup and tell
+ // whether it returned a valid result.
+ DenseMap<Value *, unsigned int> ValueInfoNums;
+
+ // The set of edges along which we can only handle phi uses, due to critical
+ // edges.
+ DenseSet<std::pair<BasicBlock *, BasicBlock *>> EdgeUsesOnly;
+
+ ValueInfo &getOrCreateValueInfo(Value *);
+ const ValueInfo &getValueInfo(Value *) const;
+
+ void processAssume(IntrinsicInst *, BasicBlock *,
+ SmallVectorImpl<Value *> &OpsToRename);
+ void processBranch(BranchInst *, BasicBlock *,
+ SmallVectorImpl<Value *> &OpsToRename);
+ void processSwitch(SwitchInst *, BasicBlock *,
+ SmallVectorImpl<Value *> &OpsToRename);
+ void renameUses(SmallVectorImpl<Value *> &OpsToRename);
+ void addInfoFor(SmallVectorImpl<Value *> &OpsToRename, Value *Op,
+ PredicateBase *PB);
+
+ typedef SmallVectorImpl<ValueDFS> ValueDFSStack;
+ void convertUsesToDFSOrdered(Value *, SmallVectorImpl<ValueDFS> &);
+ Value *materializeStack(unsigned int &, ValueDFSStack &, Value *);
+ bool stackIsInScope(const ValueDFSStack &, const ValueDFS &) const;
+ void popStackUntilDFSScope(ValueDFSStack &, const ValueDFS &);
+
+public:
+ PredicateInfoBuilder(PredicateInfo &PI, Function &F, DominatorTree &DT,
+ AssumptionCache &AC)
+ : PI(PI), F(F), DT(DT), AC(AC) {
+ // Push an empty operand info so that we can detect 0 as not finding one
+ ValueInfos.resize(1);
+ }
+
+ void buildPredicateInfo();
+};
-bool PredicateInfo::stackIsInScope(const ValueDFSStack &Stack,
- const ValueDFS &VDUse) const {
+bool PredicateInfoBuilder::stackIsInScope(const ValueDFSStack &Stack,
+ const ValueDFS &VDUse) const {
if (Stack.empty())
return false;
// If it's a phi only use, make sure it's for this phi node edge, and that the
@@ -281,15 +329,15 @@ bool PredicateInfo::stackIsInScope(const ValueDFSStack &Stack,
VDUse.DFSOut <= Stack.back().DFSOut);
}
-void PredicateInfo::popStackUntilDFSScope(ValueDFSStack &Stack,
- const ValueDFS &VD) {
+void PredicateInfoBuilder::popStackUntilDFSScope(ValueDFSStack &Stack,
+ const ValueDFS &VD) {
while (!Stack.empty() && !stackIsInScope(Stack, VD))
Stack.pop_back();
}
// Convert the uses of Op into a vector of uses, associating global and local
// DFS info with each one.
-void PredicateInfo::convertUsesToDFSOrdered(
+void PredicateInfoBuilder::convertUsesToDFSOrdered(
Value *Op, SmallVectorImpl<ValueDFS> &DFSOrderedSet) {
for (auto &U : Op->uses()) {
if (auto *I = dyn_cast<Instruction>(U.getUser())) {
@@ -338,19 +386,20 @@ void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) {
}
// Add Op, PB to the list of value infos for Op, and mark Op to be renamed.
-void PredicateInfo::addInfoFor(SmallVectorImpl<Value *> &OpsToRename, Value *Op,
- PredicateBase *PB) {
+void PredicateInfoBuilder::addInfoFor(SmallVectorImpl<Value *> &OpsToRename,
+ Value *Op, PredicateBase *PB) {
auto &OperandInfo = getOrCreateValueInfo(Op);
if (OperandInfo.Infos.empty())
OpsToRename.push_back(Op);
- AllInfos.push_back(PB);
+ PI.AllInfos.push_back(PB);
OperandInfo.Infos.push_back(PB);
}
// Process an assume instruction and place relevant operations we want to rename
// into OpsToRename.
-void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB,
- SmallVectorImpl<Value *> &OpsToRename) {
+void PredicateInfoBuilder::processAssume(
+ IntrinsicInst *II, BasicBlock *AssumeBB,
+ SmallVectorImpl<Value *> &OpsToRename) {
// See if we have a comparison we support
SmallVector<Value *, 8> CmpOperands;
SmallVector<Value *, 2> ConditionsToProcess;
@@ -389,8 +438,9 @@ void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB,
// Process a block terminating branch, and place relevant operations to be
// renamed into OpsToRename.
-void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB,
- SmallVectorImpl<Value *> &OpsToRename) {
+void PredicateInfoBuilder::processBranch(
+ BranchInst *BI, BasicBlock *BranchBB,
+ SmallVectorImpl<Value *> &OpsToRename) {
BasicBlock *FirstBB = BI->getSuccessor(0);
BasicBlock *SecondBB = BI->getSuccessor(1);
SmallVector<BasicBlock *, 2> SuccsToProcess;
@@ -459,8 +509,9 @@ void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB,
}
// Process a block terminating switch, and place relevant operations to be
// renamed into OpsToRename.
-void PredicateInfo::processSwitch(SwitchInst *SI, BasicBlock *BranchBB,
- SmallVectorImpl<Value *> &OpsToRename) {
+void PredicateInfoBuilder::processSwitch(
+ SwitchInst *SI, BasicBlock *BranchBB,
+ SmallVectorImpl<Value *> &OpsToRename) {
Value *Op = SI->getCondition();
if ((!isa<Instruction>(Op) && !isa<Argument>(Op)) || Op->hasOneUse())
return;
@@ -486,7 +537,7 @@ void PredicateInfo::processSwitch(SwitchInst *SI, BasicBlock *BranchBB,
}
// Build predicate info for our function
-void PredicateInfo::buildPredicateInfo() {
+void PredicateInfoBuilder::buildPredicateInfo() {
DT.updateDFSNumbers();
// Collect operands to rename from all conditional branch terminators, as well
// as assume statements.
@@ -530,9 +581,9 @@ static Function *getCopyDeclaration(Module *M, Type *Ty) {
// Given the renaming stack, make all the operands currently on the stack real
// by inserting them into the IR. Return the last operation's value.
-Value *PredicateInfo::materializeStack(unsigned int &Counter,
- ValueDFSStack &RenameStack,
- Value *OrigOp) {
+Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter,
+ ValueDFSStack &RenameStack,
+ Value *OrigOp) {
// Find the first thing we have to materialize
auto RevIter = RenameStack.rbegin();
for (; RevIter != RenameStack.rend(); ++RevIter)
@@ -549,6 +600,9 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
RenameIter == RenameStack.begin() ? OrigOp : (RenameIter - 1)->Def;
ValueDFS &Result = *RenameIter;
auto *ValInfo = Result.PInfo;
+ ValInfo->RenamedOp = (RenameStack.end() - Start) == RenameStack.begin()
+ ? OrigOp
+ : (RenameStack.end() - Start - 1)->Def;
// For edge predicates, we can just place the operand in the block before
// the terminator. For assume, we have to place it right before the assume
// to ensure we dominate all of our uses. Always insert right before the
@@ -558,21 +612,23 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
IRBuilder<> B(getBranchTerminator(ValInfo));
Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
if (IF->users().empty())
- CreatedDeclarations.insert(IF);
+ PI.CreatedDeclarations.insert(IF);
CallInst *PIC =
B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
- PredicateMap.insert({PIC, ValInfo});
+ PI.PredicateMap.insert({PIC, ValInfo});
Result.Def = PIC;
} else {
auto *PAssume = dyn_cast<PredicateAssume>(ValInfo);
assert(PAssume &&
"Should not have gotten here without it being an assume");
- IRBuilder<> B(PAssume->AssumeInst);
+ // Insert the predicate directly after the assume. While it also holds
+ // directly before it, assume(i1 true) is not a useful fact.
+ IRBuilder<> B(PAssume->AssumeInst->getNextNode());
Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
if (IF->users().empty())
- CreatedDeclarations.insert(IF);
+ PI.CreatedDeclarations.insert(IF);
CallInst *PIC = B.CreateCall(IF, Op);
- PredicateMap.insert({PIC, ValInfo});
+ PI.PredicateMap.insert({PIC, ValInfo});
Result.Def = PIC;
}
}
@@ -598,8 +654,8 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
//
// TODO: Use this algorithm to perform fast single-variable renaming in
// promotememtoreg and memoryssa.
-void PredicateInfo::renameUses(SmallVectorImpl<Value *> &OpsToRename) {
- ValueDFS_Compare Compare(DT, OI);
+void PredicateInfoBuilder::renameUses(SmallVectorImpl<Value *> &OpsToRename) {
+ ValueDFS_Compare Compare(DT);
// Compute liveness, and rename in O(uses) per Op.
for (auto *Op : OpsToRename) {
LLVM_DEBUG(dbgs() << "Visiting " << *Op << "\n");
@@ -719,7 +775,8 @@ void PredicateInfo::renameUses(SmallVectorImpl<Value *> &OpsToRename) {
}
}
-PredicateInfo::ValueInfo &PredicateInfo::getOrCreateValueInfo(Value *Operand) {
+PredicateInfoBuilder::ValueInfo &
+PredicateInfoBuilder::getOrCreateValueInfo(Value *Operand) {
auto OIN = ValueInfoNums.find(Operand);
if (OIN == ValueInfoNums.end()) {
// This will grow it
@@ -732,8 +789,8 @@ PredicateInfo::ValueInfo &PredicateInfo::getOrCreateValueInfo(Value *Operand) {
return ValueInfos[OIN->second];
}
-const PredicateInfo::ValueInfo &
-PredicateInfo::getValueInfo(Value *Operand) const {
+const PredicateInfoBuilder::ValueInfo &
+PredicateInfoBuilder::getValueInfo(Value *Operand) const {
auto OINI = ValueInfoNums.lookup(Operand);
assert(OINI != 0 && "Operand was not really in the Value Info Numbers");
assert(OINI < ValueInfos.size() &&
@@ -743,10 +800,9 @@ PredicateInfo::getValueInfo(Value *Operand) const {
PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT,
AssumptionCache &AC)
- : F(F), DT(DT), AC(AC), OI(&DT) {
- // Push an empty operand info so that we can detect 0 as not finding one
- ValueInfos.resize(1);
- buildPredicateInfo();
+ : F(F) {
+ PredicateInfoBuilder Builder(*this, F, DT, AC);
+ Builder.buildPredicateInfo();
}
// Remove all declarations we created . The PredicateInfo consumers are
@@ -829,11 +885,11 @@ class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter {
public:
PredicateInfoAnnotatedWriter(const PredicateInfo *M) : PredInfo(M) {}
- virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
- formatted_raw_ostream &OS) {}
+ void emitBasicBlockStartAnnot(const BasicBlock *BB,
+ formatted_raw_ostream &OS) override {}
- virtual void emitInstructionAnnot(const Instruction *I,
- formatted_raw_ostream &OS) {
+ void emitInstructionAnnot(const Instruction *I,
+ formatted_raw_ostream &OS) override {
if (const auto *PI = PredInfo->getPredicateInfoFor(I)) {
OS << "; Has predicate info\n";
if (const auto *PB = dyn_cast<PredicateBranch>(PI)) {
@@ -842,18 +898,21 @@ public:
PB->From->printAsOperand(OS);
OS << ",";
PB->To->printAsOperand(OS);
- OS << "] }\n";
+ OS << "]";
} else if (const auto *PS = dyn_cast<PredicateSwitch>(PI)) {
OS << "; switch predicate info { CaseValue: " << *PS->CaseValue
<< " Switch:" << *PS->Switch << " Edge: [";
PS->From->printAsOperand(OS);
OS << ",";
PS->To->printAsOperand(OS);
- OS << "] }\n";
+ OS << "]";
} else if (const auto *PA = dyn_cast<PredicateAssume>(PI)) {
OS << "; assume predicate info {"
- << " Comparison:" << *PA->Condition << " }\n";
+ << " Comparison:" << *PA->Condition;
}
+ OS << ", RenamedOp: ";
+ PI->RenamedOp->printAsOperand(OS, false);
+ OS << " }\n";
}
}
};
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index d58e1ea574ef8..c7e9c919ec471 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -595,11 +595,6 @@ void PromoteMem2Reg::run() {
// Keep the reverse mapping of the 'Allocas' array for the rename pass.
AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
- // At this point, we're committed to promoting the alloca using IDF's, and
- // the standard SSA construction algorithm. Determine which blocks need PHI
- // nodes and see if we can optimize out some work by avoiding insertion of
- // dead phi nodes.
-
// Unique the set of defining blocks for efficient lookup.
SmallPtrSet<BasicBlock *, 32> DefBlocks(Info.DefiningBlocks.begin(),
Info.DefiningBlocks.end());
diff --git a/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index bffdd115d940c..57df2334c750f 100644
--- a/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -56,7 +56,7 @@ void SSAUpdater::Initialize(Type *Ty, StringRef Name) {
else
getAvailableVals(AV).clear();
ProtoType = Ty;
- ProtoName = Name;
+ ProtoName = std::string(Name);
}
bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
@@ -195,11 +195,6 @@ void SSAUpdater::RewriteUse(Use &U) {
else
V = GetValueInMiddleOfBlock(User->getParent());
- // Notify that users of the existing value that it is being replaced.
- Value *OldVal = U.get();
- if (OldVal != V && OldVal->hasValueHandle())
- ValueHandleBase::ValueIsRAUWd(OldVal, V);
-
U.set(V);
}
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
new file mode 100644
index 0000000000000..71b48482f26aa
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -0,0 +1,2569 @@
+//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the scalar evolution expander,
+// which is used to generate the code corresponding to a given scalar evolution
+// expression.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+cl::opt<unsigned> llvm::SCEVCheapExpansionBudget(
+ "scev-cheap-expansion-budget", cl::Hidden, cl::init(4),
+ cl::desc("When performing SCEV expansion only if it is cheap to do, this "
+ "controls the budget that is considered cheap (default = 4)"));
+
+using namespace PatternMatch;
+
+/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
+/// reusing an existing cast if a suitable one exists, moving an existing
+/// cast if a suitable one exists but isn't in the right place, or
+/// creating a new one.
+Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
+ Instruction::CastOps Op,
+ BasicBlock::iterator IP) {
+ // This function must be called with the builder having a valid insertion
+ // point. It doesn't need to be the actual IP where the uses of the returned
+ // cast will be added, but it must dominate such IP.
+ // We use this precondition to produce a cast that will dominate all its
+ // uses. In particular, this is crucial for the case where the builder's
+ // insertion point *is* the point where we were asked to put the cast.
+ // Since we don't know the builder's insertion point is actually
+ // where the uses will be added (only that it dominates it), we are
+ // not allowed to move it.
+ BasicBlock::iterator BIP = Builder.GetInsertPoint();
+
+ Instruction *Ret = nullptr;
+
+ // Check to see if there is already a cast!
+ for (User *U : V->users())
+ if (U->getType() == Ty)
+ if (CastInst *CI = dyn_cast<CastInst>(U))
+ if (CI->getOpcode() == Op) {
+ // If the cast isn't where we want it, create a new cast at IP.
+ // Likewise, do not reuse a cast at BIP because it must dominate
+ // instructions that might be inserted before BIP.
+ if (BasicBlock::iterator(CI) != IP || BIP == IP) {
+ // Create a new cast, and leave the old cast in place in case
+ // it is being used as an insert point.
+ Ret = CastInst::Create(Op, V, Ty, "", &*IP);
+ Ret->takeName(CI);
+ CI->replaceAllUsesWith(Ret);
+ break;
+ }
+ Ret = CI;
+ break;
+ }
+
+ // Create a new cast.
+ if (!Ret)
+ Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP);
+
+ // We assert at the end of the function since IP might point to an
+ // instruction with different dominance properties than a cast
+ // (an invoke for example) and not dominate BIP (but the cast does).
+ assert(SE.DT.dominates(Ret, &*BIP));
+
+ rememberInstruction(Ret);
+ return Ret;
+}
+
+static BasicBlock::iterator findInsertPointAfter(Instruction *I,
+ BasicBlock *MustDominate) {
+ BasicBlock::iterator IP = ++I->getIterator();
+ if (auto *II = dyn_cast<InvokeInst>(I))
+ IP = II->getNormalDest()->begin();
+
+ while (isa<PHINode>(IP))
+ ++IP;
+
+ if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) {
+ ++IP;
+ } else if (isa<CatchSwitchInst>(IP)) {
+ IP = MustDominate->getFirstInsertionPt();
+ } else {
+ assert(!IP->isEHPad() && "unexpected eh pad!");
+ }
+
+ return IP;
+}
+
+/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
+/// which must be possible with a noop cast, doing what we can to share
+/// the casts.
+Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
+ Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
+ assert((Op == Instruction::BitCast ||
+ Op == Instruction::PtrToInt ||
+ Op == Instruction::IntToPtr) &&
+ "InsertNoopCastOfTo cannot perform non-noop casts!");
+ assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
+ "InsertNoopCastOfTo cannot change sizes!");
+
+ // Short-circuit unnecessary bitcasts.
+ if (Op == Instruction::BitCast) {
+ if (V->getType() == Ty)
+ return V;
+ if (CastInst *CI = dyn_cast<CastInst>(V)) {
+ if (CI->getOperand(0)->getType() == Ty)
+ return CI->getOperand(0);
+ }
+ }
+ // Short-circuit unnecessary inttoptr<->ptrtoint casts.
+ if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
+ SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
+ if (CastInst *CI = dyn_cast<CastInst>(V))
+ if ((CI->getOpcode() == Instruction::PtrToInt ||
+ CI->getOpcode() == Instruction::IntToPtr) &&
+ SE.getTypeSizeInBits(CI->getType()) ==
+ SE.getTypeSizeInBits(CI->getOperand(0)->getType()))
+ return CI->getOperand(0);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if ((CE->getOpcode() == Instruction::PtrToInt ||
+ CE->getOpcode() == Instruction::IntToPtr) &&
+ SE.getTypeSizeInBits(CE->getType()) ==
+ SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
+ return CE->getOperand(0);
+ }
+
+ // Fold a cast of a constant.
+ if (Constant *C = dyn_cast<Constant>(V))
+ return ConstantExpr::getCast(Op, C, Ty);
+
+ // Cast the argument at the beginning of the entry block, after
+ // any bitcasts of other arguments.
+ if (Argument *A = dyn_cast<Argument>(V)) {
+ BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
+ while ((isa<BitCastInst>(IP) &&
+ isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
+ cast<BitCastInst>(IP)->getOperand(0) != A) ||
+ isa<DbgInfoIntrinsic>(IP))
+ ++IP;
+ return ReuseOrCreateCast(A, Ty, Op, IP);
+ }
+
+ // Cast the instruction immediately after the instruction.
+ Instruction *I = cast<Instruction>(V);
+ BasicBlock::iterator IP = findInsertPointAfter(I, Builder.GetInsertBlock());
+ return ReuseOrCreateCast(I, Ty, Op, IP);
+}
+
+/// InsertBinop - Insert the specified binary operator, doing a small amount
+/// of work to avoid inserting an obviously redundant operation, and hoisting
+/// to an outer loop when the opportunity is there and it is safe.
+Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
+ Value *LHS, Value *RHS,
+ SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
+ // Fold a binop with constant operands.
+ if (Constant *CLHS = dyn_cast<Constant>(LHS))
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ return ConstantExpr::get(Opcode, CLHS, CRHS);
+
+ // Do a quick scan to see if we have this binop nearby. If so, reuse it.
+ unsigned ScanLimit = 6;
+ BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+ // Scanning starts from the last instruction before the insertion point.
+ BasicBlock::iterator IP = Builder.GetInsertPoint();
+ if (IP != BlockBegin) {
+ --IP;
+ for (; ScanLimit; --IP, --ScanLimit) {
+ // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+ // generated code.
+ if (isa<DbgInfoIntrinsic>(IP))
+ ScanLimit++;
+
+ auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
+ // Ensure that no-wrap flags match.
+ if (isa<OverflowingBinaryOperator>(I)) {
+ if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
+ return true;
+ if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
+ return true;
+ }
+ // Conservatively, do not use any instruction which has any of exact
+ // flags installed.
+ if (isa<PossiblyExactOperator>(I) && I->isExact())
+ return true;
+ return false;
+ };
+ if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
+ IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP))
+ return &*IP;
+ if (IP == BlockBegin) break;
+ }
+ }
+
+ // Save the original insertion point so we can restore it when we're done.
+ DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
+ SCEVInsertPointGuard Guard(Builder, this);
+
+ if (IsSafeToHoist) {
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader->getTerminator());
+ }
+ }
+
+ // If we haven't found this binop, insert it.
+ Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS));
+ BO->setDebugLoc(Loc);
+ if (Flags & SCEV::FlagNUW)
+ BO->setHasNoUnsignedWrap();
+ if (Flags & SCEV::FlagNSW)
+ BO->setHasNoSignedWrap();
+ rememberInstruction(BO);
+
+ return BO;
+}
+
+/// FactorOutConstant - Test if S is divisible by Factor, using signed
+/// division. If so, update S with Factor divided out and return true.
+/// S need not be evenly divisible if a reasonable remainder can be
+/// computed.
+static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
+ const SCEV *Factor, ScalarEvolution &SE,
+ const DataLayout &DL) {
+ // Everything is divisible by one.
+ if (Factor->isOne())
+ return true;
+
+ // x/x == 1.
+ if (S == Factor) {
+ S = SE.getConstant(S->getType(), 1);
+ return true;
+ }
+
+ // For a Constant, check for a multiple of the given factor.
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+ // 0/x == 0.
+ if (C->isZero())
+ return true;
+ // Check for divisibility.
+ if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
+ ConstantInt *CI =
+ ConstantInt::get(SE.getContext(), C->getAPInt().sdiv(FC->getAPInt()));
+ // If the quotient is zero and the remainder is non-zero, reject
+ // the value at this scale. It will be considered for subsequent
+ // smaller scales.
+ if (!CI->isZero()) {
+ const SCEV *Div = SE.getConstant(CI);
+ S = Div;
+ Remainder = SE.getAddExpr(
+ Remainder, SE.getConstant(C->getAPInt().srem(FC->getAPInt())));
+ return true;
+ }
+ }
+ }
+
+ // In a Mul, check if there is a constant operand which is a multiple
+ // of the given factor.
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
+ // Size is known, check if there is a constant operand which is a multiple
+ // of the given factor. If so, we can factor it.
+ if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor))
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
+ if (!C->getAPInt().srem(FC->getAPInt())) {
+ SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
+ NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt()));
+ S = SE.getMulExpr(NewMulOps);
+ return true;
+ }
+ }
+
+ // In an AddRec, check if both start and step are divisible.
+ if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
+ const SCEV *Step = A->getStepRecurrence(SE);
+ const SCEV *StepRem = SE.getConstant(Step->getType(), 0);
+ if (!FactorOutConstant(Step, StepRem, Factor, SE, DL))
+ return false;
+ if (!StepRem->isZero())
+ return false;
+ const SCEV *Start = A->getStart();
+ if (!FactorOutConstant(Start, Remainder, Factor, SE, DL))
+ return false;
+ S = SE.getAddRecExpr(Start, Step, A->getLoop(),
+ A->getNoWrapFlags(SCEV::FlagNW));
+ return true;
+ }
+
+ return false;
+}
+
+/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs
+/// is the number of SCEVAddRecExprs present, which are kept at the end of
+/// the list.
+///
+static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
+ Type *Ty,
+ ScalarEvolution &SE) {
+ unsigned NumAddRecs = 0;
+ for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i)
+ ++NumAddRecs;
+ // Group Ops into non-addrecs and addrecs.
+ SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs);
+ SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end());
+ // Let ScalarEvolution sort and simplify the non-addrecs list.
+ const SCEV *Sum = NoAddRecs.empty() ?
+ SE.getConstant(Ty, 0) :
+ SE.getAddExpr(NoAddRecs);
+ // If it returned an add, use the operands. Otherwise it simplified
+ // the sum into a single value, so just use that.
+ Ops.clear();
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
+ Ops.append(Add->op_begin(), Add->op_end());
+ else if (!Sum->isZero())
+ Ops.push_back(Sum);
+ // Then append the addrecs.
+ Ops.append(AddRecs.begin(), AddRecs.end());
+}
+
+/// SplitAddRecs - Flatten a list of add operands, moving addrec start values
+/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}.
+/// This helps expose more opportunities for folding parts of the expressions
+/// into GEP indices.
+///
+static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
+ Type *Ty,
+ ScalarEvolution &SE) {
+ // Find the addrecs.
+ SmallVector<const SCEV *, 8> AddRecs;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) {
+ const SCEV *Start = A->getStart();
+ if (Start->isZero()) break;
+ const SCEV *Zero = SE.getConstant(Ty, 0);
+ AddRecs.push_back(SE.getAddRecExpr(Zero,
+ A->getStepRecurrence(SE),
+ A->getLoop(),
+ A->getNoWrapFlags(SCEV::FlagNW)));
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
+ Ops[i] = Zero;
+ Ops.append(Add->op_begin(), Add->op_end());
+ e += Add->getNumOperands();
+ } else {
+ Ops[i] = Start;
+ }
+ }
+ if (!AddRecs.empty()) {
+ // Add the addrecs onto the end of the list.
+ Ops.append(AddRecs.begin(), AddRecs.end());
+ // Resort the operand list, moving any constants to the front.
+ SimplifyAddOperands(Ops, Ty, SE);
+ }
+}
+
+/// expandAddToGEP - Expand an addition expression with a pointer type into
+/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
+/// BasicAliasAnalysis and other passes analyze the result. See the rules
+/// for getelementptr vs. inttoptr in
+/// http://llvm.org/docs/LangRef.html#pointeraliasing
+/// for details.
+///
+/// Design note: The correctness of using getelementptr here depends on
+/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
+/// they may introduce pointer arithmetic which may not be safely converted
+/// into getelementptr.
+///
+/// Design note: It might seem desirable for this function to be more
+/// loop-aware. If some of the indices are loop-invariant while others
+/// aren't, it might seem desirable to emit multiple GEPs, keeping the
+/// loop-invariant portions of the overall computation outside the loop.
+/// However, there are a few reasons this is not done here. Hoisting simple
+/// arithmetic is a low-level optimization that often isn't very
+/// important until late in the optimization process. In fact, passes
+/// like InstructionCombining will combine GEPs, even if it means
+/// pushing loop-invariant computation down into loops, so even if the
+/// GEPs were split here, the work would quickly be undone. The
+/// LoopStrengthReduction pass, which is usually run quite late (and
+/// after the last InstructionCombining pass), takes care of hoisting
+/// loop-invariant portions of expressions, after considering what
+/// can be folded using target addressing modes.
+///
+Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
+ const SCEV *const *op_end,
+ PointerType *PTy,
+ Type *Ty,
+ Value *V) {
+ Type *OriginalElTy = PTy->getElementType();
+ Type *ElTy = OriginalElTy;
+ SmallVector<Value *, 4> GepIndices;
+ SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
+ bool AnyNonZeroIndices = false;
+
+ // Split AddRecs up into parts as either of the parts may be usable
+ // without the other.
+ SplitAddRecs(Ops, Ty, SE);
+
+ Type *IntIdxTy = DL.getIndexType(PTy);
+
+ // Descend down the pointer's type and attempt to convert the other
+ // operands into GEP indices, at each level. The first index in a GEP
+ // indexes into the array implied by the pointer operand; the rest of
+ // the indices index into the element or field type selected by the
+ // preceding index.
+ for (;;) {
+ // If the scale size is not 0, attempt to factor out a scale for
+ // array indexing.
+ SmallVector<const SCEV *, 8> ScaledOps;
+ if (ElTy->isSized()) {
+ const SCEV *ElSize = SE.getSizeOfExpr(IntIdxTy, ElTy);
+ if (!ElSize->isZero()) {
+ SmallVector<const SCEV *, 8> NewOps;
+ for (const SCEV *Op : Ops) {
+ const SCEV *Remainder = SE.getConstant(Ty, 0);
+ if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {
+ // Op now has ElSize factored out.
+ ScaledOps.push_back(Op);
+ if (!Remainder->isZero())
+ NewOps.push_back(Remainder);
+ AnyNonZeroIndices = true;
+ } else {
+ // The operand was not divisible, so add it to the list of operands
+ // we'll scan next iteration.
+ NewOps.push_back(Op);
+ }
+ }
+ // If we made any changes, update Ops.
+ if (!ScaledOps.empty()) {
+ Ops = NewOps;
+ SimplifyAddOperands(Ops, Ty, SE);
+ }
+ }
+ }
+
+ // Record the scaled array index for this level of the type. If
+ // we didn't find any operands that could be factored, tentatively
+ // assume that element zero was selected (since the zero offset
+ // would obviously be folded away).
+ Value *Scaled = ScaledOps.empty() ?
+ Constant::getNullValue(Ty) :
+ expandCodeFor(SE.getAddExpr(ScaledOps), Ty);
+ GepIndices.push_back(Scaled);
+
+ // Collect struct field index operands.
+ while (StructType *STy = dyn_cast<StructType>(ElTy)) {
+ bool FoundFieldNo = false;
+ // An empty struct has no fields.
+ if (STy->getNumElements() == 0) break;
+ // Field offsets are known. See if a constant offset falls within any of
+ // the struct fields.
+ if (Ops.empty())
+ break;
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
+ if (SE.getTypeSizeInBits(C->getType()) <= 64) {
+ const StructLayout &SL = *DL.getStructLayout(STy);
+ uint64_t FullOffset = C->getValue()->getZExtValue();
+ if (FullOffset < SL.getSizeInBytes()) {
+ unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
+ GepIndices.push_back(
+ ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
+ ElTy = STy->getTypeAtIndex(ElIdx);
+ Ops[0] =
+ SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
+ AnyNonZeroIndices = true;
+ FoundFieldNo = true;
+ }
+ }
+ // If no struct field offsets were found, tentatively assume that
+ // field zero was selected (since the zero offset would obviously
+ // be folded away).
+ if (!FoundFieldNo) {
+ ElTy = STy->getTypeAtIndex(0u);
+ GepIndices.push_back(
+ Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
+ }
+ }
+
+ if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
+ ElTy = ATy->getElementType();
+ else
+ // FIXME: Handle VectorType.
+ // E.g., If ElTy is scalable vector, then ElSize is not a compile-time
+ // constant, therefore can not be factored out. The generated IR is less
+ // ideal with base 'V' cast to i8* and do ugly getelementptr over that.
+ break;
+ }
+
+ // If none of the operands were convertible to proper GEP indices, cast
+ // the base to i8* and do an ugly getelementptr with that. It's still
+ // better than ptrtoint+arithmetic+inttoptr at least.
+ if (!AnyNonZeroIndices) {
+ // Cast the base to i8*.
+ V = InsertNoopCastOfTo(V,
+ Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
+
+ assert(!isa<Instruction>(V) ||
+ SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
+
+ // Expand the operands for a plain byte offset.
+ Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
+
+ // Fold a GEP with constant operands.
+ if (Constant *CLHS = dyn_cast<Constant>(V))
+ if (Constant *CRHS = dyn_cast<Constant>(Idx))
+ return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ty->getContext()),
+ CLHS, CRHS);
+
+ // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
+ unsigned ScanLimit = 6;
+ BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+ // Scanning starts from the last instruction before the insertion point.
+ BasicBlock::iterator IP = Builder.GetInsertPoint();
+ if (IP != BlockBegin) {
+ --IP;
+ for (; ScanLimit; --IP, --ScanLimit) {
+ // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+ // generated code.
+ if (isa<DbgInfoIntrinsic>(IP))
+ ScanLimit++;
+ if (IP->getOpcode() == Instruction::GetElementPtr &&
+ IP->getOperand(0) == V && IP->getOperand(1) == Idx)
+ return &*IP;
+ if (IP == BlockBegin) break;
+ }
+ }
+
+ // Save the original insertion point so we can restore it when we're done.
+ SCEVInsertPointGuard Guard(Builder, this);
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader->getTerminator());
+ }
+
+ // Emit a GEP.
+ Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep");
+ rememberInstruction(GEP);
+
+ return GEP;
+ }
+
+ {
+ SCEVInsertPointGuard Guard(Builder, this);
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(V)) break;
+
+ bool AnyIndexNotLoopInvariant = any_of(
+ GepIndices, [L](Value *Op) { return !L->isLoopInvariant(Op); });
+
+ if (AnyIndexNotLoopInvariant)
+ break;
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader->getTerminator());
+ }
+
+ // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
+ // because ScalarEvolution may have changed the address arithmetic to
+ // compute a value which is beyond the end of the allocated object.
+ Value *Casted = V;
+ if (V->getType() != PTy)
+ Casted = InsertNoopCastOfTo(Casted, PTy);
+ Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep");
+ Ops.push_back(SE.getUnknown(GEP));
+ rememberInstruction(GEP);
+ }
+
+ return expand(SE.getAddExpr(Ops));
+}
+
+Value *SCEVExpander::expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty,
+ Value *V) {
+ const SCEV *const Ops[1] = {Op};
+ return expandAddToGEP(Ops, Ops + 1, PTy, Ty, V);
+}
+
+/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
+/// SCEV expansion. If they are nested, this is the most nested. If they are
+/// neighboring, pick the later.
+static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
+ DominatorTree &DT) {
+ if (!A) return B;
+ if (!B) return A;
+ if (A->contains(B)) return B;
+ if (B->contains(A)) return A;
+ if (DT.dominates(A->getHeader(), B->getHeader())) return B;
+ if (DT.dominates(B->getHeader(), A->getHeader())) return A;
+ return A; // Arbitrarily break the tie.
+}
+
+/// getRelevantLoop - Get the most relevant loop associated with the given
+/// expression, according to PickMostRelevantLoop.
+const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
+ // Test whether we've already computed the most relevant loop for this SCEV.
+ auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ if (isa<SCEVConstant>(S))
+ // A constant has no relevant loops.
+ return nullptr;
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
+ return Pair.first->second = SE.LI.getLoopFor(I->getParent());
+ // A non-instruction has no relevant loops.
+ return nullptr;
+ }
+ if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
+ const Loop *L = nullptr;
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ L = AR->getLoop();
+ for (const SCEV *Op : N->operands())
+ L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT);
+ return RelevantLoops[N] = L;
+ }
+ if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) {
+ const Loop *Result = getRelevantLoop(C->getOperand());
+ return RelevantLoops[C] = Result;
+ }
+ if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+ const Loop *Result = PickMostRelevantLoop(
+ getRelevantLoop(D->getLHS()), getRelevantLoop(D->getRHS()), SE.DT);
+ return RelevantLoops[D] = Result;
+ }
+ llvm_unreachable("Unexpected SCEV type!");
+}
+
+namespace {
+
+/// LoopCompare - Compare loops by PickMostRelevantLoop.
+class LoopCompare {
+ DominatorTree &DT;
+public:
+ explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
+
+ bool operator()(std::pair<const Loop *, const SCEV *> LHS,
+ std::pair<const Loop *, const SCEV *> RHS) const {
+ // Keep pointer operands sorted at the end.
+ if (LHS.second->getType()->isPointerTy() !=
+ RHS.second->getType()->isPointerTy())
+ return LHS.second->getType()->isPointerTy();
+
+ // Compare loops with PickMostRelevantLoop.
+ if (LHS.first != RHS.first)
+ return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
+
+ // If one operand is a non-constant negative and the other is not,
+ // put the non-constant negative on the right so that a sub can
+ // be used instead of a negate and add.
+ if (LHS.second->isNonConstantNegative()) {
+ if (!RHS.second->isNonConstantNegative())
+ return false;
+ } else if (RHS.second->isNonConstantNegative())
+ return true;
+
+ // Otherwise they are equivalent according to this comparison.
+ return false;
+ }
+};
+
+}
+
+Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+ // Collect all the add operands in a loop, along with their associated loops.
+ // Iterate in reverse so that constants are emitted last, all else equal, and
+ // so that pointer operands are inserted first, which the code below relies on
+ // to form more involved GEPs.
+ SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+ for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
+ E(S->op_begin()); I != E; ++I)
+ OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
+
+ // Sort by loop. Use a stable sort so that constants follow non-constants and
+ // pointer operands precede non-pointer operands.
+ llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
+
+ // Emit instructions to add all the operands. Hoist as much as possible
+ // out of loops, and form meaningful getelementptrs where possible.
+ Value *Sum = nullptr;
+ for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
+ const Loop *CurLoop = I->first;
+ const SCEV *Op = I->second;
+ if (!Sum) {
+ // This is the first operand. Just expand it.
+ Sum = expand(Op);
+ ++I;
+ } else if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
+ // The running sum expression is a pointer. Try to form a getelementptr
+ // at this level with that as the base.
+ SmallVector<const SCEV *, 4> NewOps;
+ for (; I != E && I->first == CurLoop; ++I) {
+ // If the operand is SCEVUnknown and not instructions, peek through
+ // it, to enable more of it to be folded into the GEP.
+ const SCEV *X = I->second;
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
+ if (!isa<Instruction>(U->getValue()))
+ X = SE.getSCEV(U->getValue());
+ NewOps.push_back(X);
+ }
+ Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
+ } else if (PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
+ // The running sum is an integer, and there's a pointer at this level.
+ // Try to form a getelementptr. If the running sum is instructions,
+ // use a SCEVUnknown to avoid re-analyzing them.
+ SmallVector<const SCEV *, 4> NewOps;
+ NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) :
+ SE.getSCEV(Sum));
+ for (++I; I != E && I->first == CurLoop; ++I)
+ NewOps.push_back(I->second);
+ Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
+ } else if (Op->isNonConstantNegative()) {
+ // Instead of doing a negate and add, just do a subtract.
+ Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
+ Sum = InsertNoopCastOfTo(Sum, Ty);
+ Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap,
+ /*IsSafeToHoist*/ true);
+ ++I;
+ } else {
+ // A simple add.
+ Value *W = expandCodeFor(Op, Ty);
+ Sum = InsertNoopCastOfTo(Sum, Ty);
+ // Canonicalize a constant to the RHS.
+ if (isa<Constant>(Sum)) std::swap(Sum, W);
+ Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(),
+ /*IsSafeToHoist*/ true);
+ ++I;
+ }
+ }
+
+ return Sum;
+}
+
+Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+ // Collect all the mul operands in a loop, along with their associated loops.
+ // Iterate in reverse so that constants are emitted last, all else equal.
+ SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+ for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
+ E(S->op_begin()); I != E; ++I)
+ OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
+
+ // Sort by loop. Use a stable sort so that constants follow non-constants.
+ llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
+
+ // Emit instructions to mul all the operands. Hoist as much as possible
+ // out of loops.
+ Value *Prod = nullptr;
+ auto I = OpsAndLoops.begin();
+
+ // Expand the calculation of X pow N in the following manner:
+ // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
+ // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
+ const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty]() {
+ auto E = I;
+ // Calculate how many times the same operand from the same loop is included
+ // into this power.
+ uint64_t Exponent = 0;
+ const uint64_t MaxExponent = UINT64_MAX >> 1;
+ // No one sane will ever try to calculate such huge exponents, but if we
+ // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
+ // below when the power of 2 exceeds our Exponent, and we want it to be
+ // 1u << 31 at most to not deal with unsigned overflow.
+ while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) {
+ ++Exponent;
+ ++E;
+ }
+ assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?");
+
+ // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
+ // that are needed into the result.
+ Value *P = expandCodeFor(I->second, Ty);
+ Value *Result = nullptr;
+ if (Exponent & 1)
+ Result = P;
+ for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
+ P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap,
+ /*IsSafeToHoist*/ true);
+ if (Exponent & BinExp)
+ Result = Result ? InsertBinop(Instruction::Mul, Result, P,
+ SCEV::FlagAnyWrap,
+ /*IsSafeToHoist*/ true)
+ : P;
+ }
+
+ I = E;
+ assert(Result && "Nothing was expanded?");
+ return Result;
+ };
+
+ while (I != OpsAndLoops.end()) {
+ if (!Prod) {
+ // This is the first operand. Just expand it.
+ Prod = ExpandOpBinPowN();
+ } else if (I->second->isAllOnesValue()) {
+ // Instead of doing a multiply by negative one, just do a negate.
+ Prod = InsertNoopCastOfTo(Prod, Ty);
+ Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod,
+ SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
+ ++I;
+ } else {
+ // A simple mul.
+ Value *W = ExpandOpBinPowN();
+ Prod = InsertNoopCastOfTo(Prod, Ty);
+ // Canonicalize a constant to the RHS.
+ if (isa<Constant>(Prod)) std::swap(Prod, W);
+ const APInt *RHS;
+ if (match(W, m_Power2(RHS))) {
+ // Canonicalize Prod*(1<<C) to Prod<<C.
+ assert(!Ty->isVectorTy() && "vector types are not SCEVable");
+ auto NWFlags = S->getNoWrapFlags();
+ // clear nsw flag if shl will produce poison value.
+ if (RHS->logBase2() == RHS->getBitWidth() - 1)
+ NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW);
+ Prod = InsertBinop(Instruction::Shl, Prod,
+ ConstantInt::get(Ty, RHS->logBase2()), NWFlags,
+ /*IsSafeToHoist*/ true);
+ } else {
+ Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(),
+ /*IsSafeToHoist*/ true);
+ }
+ }
+ }
+
+ return Prod;
+}
+
+Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+ Value *LHS = expandCodeFor(S->getLHS(), Ty);
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
+ const APInt &RHS = SC->getAPInt();
+ if (RHS.isPowerOf2())
+ return InsertBinop(Instruction::LShr, LHS,
+ ConstantInt::get(Ty, RHS.logBase2()),
+ SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
+ }
+
+ Value *RHS = expandCodeFor(S->getRHS(), Ty);
+ return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
+ /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
+}
+
+/// Move parts of Base into Rest to leave Base with the minimal
+/// expression that provides a pointer operand suitable for a
+/// GEP expansion.
+static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
+ ScalarEvolution &SE) {
+ while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) {
+ Base = A->getStart();
+ Rest = SE.getAddExpr(Rest,
+ SE.getAddRecExpr(SE.getConstant(A->getType(), 0),
+ A->getStepRecurrence(SE),
+ A->getLoop(),
+ A->getNoWrapFlags(SCEV::FlagNW)));
+ }
+ if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
+ Base = A->getOperand(A->getNumOperands()-1);
+ SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end());
+ NewAddOps.back() = Rest;
+ Rest = SE.getAddExpr(NewAddOps);
+ ExposePointerBase(Base, Rest, SE);
+ }
+}
+
+/// Determine if this is a well-behaved chain of instructions leading back to
+/// the PHI. If so, it may be reused by expanded expressions.
+bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
+ const Loop *L) {
+ if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
+ (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV)))
+ return false;
+ // If any of the operands don't dominate the insert position, bail.
+ // Addrec operands are always loop-invariant, so this can only happen
+ // if there are instructions which haven't been hoisted.
+ if (L == IVIncInsertLoop) {
+ for (User::op_iterator OI = IncV->op_begin()+1,
+ OE = IncV->op_end(); OI != OE; ++OI)
+ if (Instruction *OInst = dyn_cast<Instruction>(OI))
+ if (!SE.DT.dominates(OInst, IVIncInsertPos))
+ return false;
+ }
+ // Advance to the next instruction.
+ IncV = dyn_cast<Instruction>(IncV->getOperand(0));
+ if (!IncV)
+ return false;
+
+ if (IncV->mayHaveSideEffects())
+ return false;
+
+ if (IncV == PN)
+ return true;
+
+ return isNormalAddRecExprPHI(PN, IncV, L);
+}
+
+/// getIVIncOperand returns an induction variable increment's induction
+/// variable operand.
+///
+/// If allowScale is set, any type of GEP is allowed as long as the nonIV
+/// operands dominate InsertPos.
+///
+/// If allowScale is not set, ensure that a GEP increment conforms to one of the
+/// simple patterns generated by getAddRecExprPHILiterally and
+/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
+Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
+ Instruction *InsertPos,
+ bool allowScale) {
+ if (IncV == InsertPos)
+ return nullptr;
+
+ switch (IncV->getOpcode()) {
+ default:
+ return nullptr;
+ // Check for a simple Add/Sub or GEP of a loop invariant step.
+ case Instruction::Add:
+ case Instruction::Sub: {
+ Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
+ if (!OInst || SE.DT.dominates(OInst, InsertPos))
+ return dyn_cast<Instruction>(IncV->getOperand(0));
+ return nullptr;
+ }
+ case Instruction::BitCast:
+ return dyn_cast<Instruction>(IncV->getOperand(0));
+ case Instruction::GetElementPtr:
+ for (auto I = IncV->op_begin() + 1, E = IncV->op_end(); I != E; ++I) {
+ if (isa<Constant>(*I))
+ continue;
+ if (Instruction *OInst = dyn_cast<Instruction>(*I)) {
+ if (!SE.DT.dominates(OInst, InsertPos))
+ return nullptr;
+ }
+ if (allowScale) {
+ // allow any kind of GEP as long as it can be hoisted.
+ continue;
+ }
+ // This must be a pointer addition of constants (pretty), which is already
+ // handled, or some number of address-size elements (ugly). Ugly geps
+ // have 2 operands. i1* is used by the expander to represent an
+ // address-size element.
+ if (IncV->getNumOperands() != 2)
+ return nullptr;
+ unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
+ if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
+ && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
+ return nullptr;
+ break;
+ }
+ return dyn_cast<Instruction>(IncV->getOperand(0));
+ }
+}
+
+/// If the insert point of the current builder or any of the builders on the
+/// stack of saved builders has 'I' as its insert point, update it to point to
+/// the instruction after 'I'. This is intended to be used when the instruction
+/// 'I' is being moved. If this fixup is not done and 'I' is moved to a
+/// different block, the inconsistent insert point (with a mismatched
+/// Instruction and Block) can lead to an instruction being inserted in a block
+/// other than its parent.
+void SCEVExpander::fixupInsertPoints(Instruction *I) {
+ BasicBlock::iterator It(*I);
+ BasicBlock::iterator NewInsertPt = std::next(It);
+ if (Builder.GetInsertPoint() == It)
+ Builder.SetInsertPoint(&*NewInsertPt);
+ for (auto *InsertPtGuard : InsertPointGuards)
+ if (InsertPtGuard->GetInsertPoint() == It)
+ InsertPtGuard->SetInsertPoint(NewInsertPt);
+}
+
+/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
+/// it available to other uses in this loop. Recursively hoist any operands,
+/// until we reach a value that dominates InsertPos.
+bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) {
+ if (SE.DT.dominates(IncV, InsertPos))
+ return true;
+
+ // InsertPos must itself dominate IncV so that IncV's new position satisfies
+ // its existing users.
+ if (isa<PHINode>(InsertPos) ||
+ !SE.DT.dominates(InsertPos->getParent(), IncV->getParent()))
+ return false;
+
+ if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos))
+ return false;
+
+ // Check that the chain of IV operands leading back to Phi can be hoisted.
+ SmallVector<Instruction*, 4> IVIncs;
+ for(;;) {
+ Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
+ if (!Oper)
+ return false;
+ // IncV is safe to hoist.
+ IVIncs.push_back(IncV);
+ IncV = Oper;
+ if (SE.DT.dominates(IncV, InsertPos))
+ break;
+ }
+ for (auto I = IVIncs.rbegin(), E = IVIncs.rend(); I != E; ++I) {
+ fixupInsertPoints(*I);
+ (*I)->moveBefore(InsertPos);
+ }
+ return true;
+}
+
+/// Determine if this cyclic phi is in a form that would have been generated by
+/// LSR. We don't care if the phi was actually expanded in this pass, as long
+/// as it is in a low-cost form, for example, no implied multiplication. This
+/// should match any patterns generated by getAddRecExprPHILiterally and
+/// expandAddtoGEP.
+bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
+ const Loop *L) {
+ for(Instruction *IVOper = IncV;
+ (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(),
+ /*allowScale=*/false));) {
+ if (IVOper == PN)
+ return true;
+ }
+ return false;
+}
+
+/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
+/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
+/// need to materialize IV increments elsewhere to handle difficult situations.
+Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
+ Type *ExpandTy, Type *IntTy,
+ bool useSubtract) {
+ Value *IncV;
+ // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
+ if (ExpandTy->isPointerTy()) {
+ PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
+ // If the step isn't constant, don't use an implicitly scaled GEP, because
+ // that would require a multiply inside the loop.
+ if (!isa<ConstantInt>(StepV))
+ GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
+ GEPPtrTy->getAddressSpace());
+ IncV = expandAddToGEP(SE.getSCEV(StepV), GEPPtrTy, IntTy, PN);
+ if (IncV->getType() != PN->getType()) {
+ IncV = Builder.CreateBitCast(IncV, PN->getType());
+ rememberInstruction(IncV);
+ }
+ } else {
+ IncV = useSubtract ?
+ Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
+ Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
+ rememberInstruction(IncV);
+ }
+ return IncV;
+}
+
+/// Hoist the addrec instruction chain rooted in the loop phi above the
+/// position. This routine assumes that this is possible (has been checked).
+void SCEVExpander::hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
+ Instruction *Pos, PHINode *LoopPhi) {
+ do {
+ if (DT->dominates(InstToHoist, Pos))
+ break;
+ // Make sure the increment is where we want it. But don't move it
+ // down past a potential existing post-inc user.
+ fixupInsertPoints(InstToHoist);
+ InstToHoist->moveBefore(Pos);
+ Pos = InstToHoist;
+ InstToHoist = cast<Instruction>(InstToHoist->getOperand(0));
+ } while (InstToHoist != LoopPhi);
+}
+
+/// Check whether we can cheaply express the requested SCEV in terms of
+/// the available PHI SCEV by truncation and/or inversion of the step.
+static bool canBeCheaplyTransformed(ScalarEvolution &SE,
+ const SCEVAddRecExpr *Phi,
+ const SCEVAddRecExpr *Requested,
+ bool &InvertStep) {
+ Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType());
+ Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType());
+
+ if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
+ return false;
+
+ // Try truncate it if necessary.
+ Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy));
+ if (!Phi)
+ return false;
+
+ // Check whether truncation will help.
+ if (Phi == Requested) {
+ InvertStep = false;
+ return true;
+ }
+
+ // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
+ if (SE.getAddExpr(Requested->getStart(),
+ SE.getNegativeSCEV(Requested)) == Phi) {
+ InvertStep = true;
+ return true;
+ }
+
+ return false;
+}
+
+static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
+ if (!isa<IntegerType>(AR->getType()))
+ return false;
+
+ unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
+ Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy),
+ SE.getSignExtendExpr(AR, WideTy));
+ const SCEV *ExtendAfterOp =
+ SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy);
+ return ExtendAfterOp == OpAfterExtend;
+}
+
+static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
+ if (!isa<IntegerType>(AR->getType()))
+ return false;
+
+ unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
+ Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy),
+ SE.getZeroExtendExpr(AR, WideTy));
+ const SCEV *ExtendAfterOp =
+ SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy);
+ return ExtendAfterOp == OpAfterExtend;
+}
+
+/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
+/// the base addrec, which is the addrec without any non-loop-dominating
+/// values, and return the PHI.
+PHINode *
+SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
+ const Loop *L,
+ Type *ExpandTy,
+ Type *IntTy,
+ Type *&TruncTy,
+ bool &InvertStep) {
+ assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
+
+ // Reuse a previously-inserted PHI, if present.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (LatchBlock) {
+ PHINode *AddRecPhiMatch = nullptr;
+ Instruction *IncV = nullptr;
+ TruncTy = nullptr;
+ InvertStep = false;
+
+ // Only try partially matching scevs that need truncation and/or
+ // step-inversion if we know this loop is outside the current loop.
+ bool TryNonMatchingSCEV =
+ IVIncInsertLoop &&
+ SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
+
+ for (PHINode &PN : L->getHeader()->phis()) {
+ if (!SE.isSCEVable(PN.getType()))
+ continue;
+
+ const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
+ if (!PhiSCEV)
+ continue;
+
+ bool IsMatchingSCEV = PhiSCEV == Normalized;
+ // We only handle truncation and inversion of phi recurrences for the
+ // expanded expression if the expanded expression's loop dominates the
+ // loop we insert to. Check now, so we can bail out early.
+ if (!IsMatchingSCEV && !TryNonMatchingSCEV)
+ continue;
+
+ // TODO: this possibly can be reworked to avoid this cast at all.
+ Instruction *TempIncV =
+ dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock));
+ if (!TempIncV)
+ continue;
+
+ // Check whether we can reuse this PHI node.
+ if (LSRMode) {
+ if (!isExpandedAddRecExprPHI(&PN, TempIncV, L))
+ continue;
+ if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos))
+ continue;
+ } else {
+ if (!isNormalAddRecExprPHI(&PN, TempIncV, L))
+ continue;
+ }
+
+ // Stop if we have found an exact match SCEV.
+ if (IsMatchingSCEV) {
+ IncV = TempIncV;
+ TruncTy = nullptr;
+ InvertStep = false;
+ AddRecPhiMatch = &PN;
+ break;
+ }
+
+ // Try whether the phi can be translated into the requested form
+ // (truncated and/or offset by a constant).
+ if ((!TruncTy || InvertStep) &&
+ canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
+ // Record the phi node. But don't stop we might find an exact match
+ // later.
+ AddRecPhiMatch = &PN;
+ IncV = TempIncV;
+ TruncTy = SE.getEffectiveSCEVType(Normalized->getType());
+ }
+ }
+
+ if (AddRecPhiMatch) {
+ // Potentially, move the increment. We have made sure in
+ // isExpandedAddRecExprPHI or hoistIVInc that this is possible.
+ if (L == IVIncInsertLoop)
+ hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch);
+
+ // Ok, the add recurrence looks usable.
+ // Remember this PHI, even in post-inc mode.
+ InsertedValues.insert(AddRecPhiMatch);
+ // Remember the increment.
+ rememberInstruction(IncV);
+ return AddRecPhiMatch;
+ }
+ }
+
+ // Save the original insertion point so we can restore it when we're done.
+ SCEVInsertPointGuard Guard(Builder, this);
+
+ // Another AddRec may need to be recursively expanded below. For example, if
+ // this AddRec is quadratic, the StepV may itself be an AddRec in this
+ // loop. Remove this loop from the PostIncLoops set before expanding such
+ // AddRecs. Otherwise, we cannot find a valid position for the step
+ // (i.e. StepV can never dominate its loop header). Ideally, we could do
+ // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
+ // so it's not worth implementing SmallPtrSet::swap.
+ PostIncLoopSet SavedPostIncLoops = PostIncLoops;
+ PostIncLoops.clear();
+
+ // Expand code for the start value into the loop preheader.
+ assert(L->getLoopPreheader() &&
+ "Can't expand add recurrences without a loop preheader!");
+ Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
+ L->getLoopPreheader()->getTerminator());
+
+ // StartV must have been be inserted into L's preheader to dominate the new
+ // phi.
+ assert(!isa<Instruction>(StartV) ||
+ SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
+ L->getHeader()));
+
+ // Expand code for the step value. Do this before creating the PHI so that PHI
+ // reuse code doesn't see an incomplete PHI.
+ const SCEV *Step = Normalized->getStepRecurrence(SE);
+ // If the stride is negative, insert a sub instead of an add for the increment
+ // (unless it's a constant, because subtracts of constants are canonicalized
+ // to adds).
+ bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
+ if (useSubtract)
+ Step = SE.getNegativeSCEV(Step);
+ // Expand the step somewhere that dominates the loop header.
+ Value *StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
+
+ // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
+ // we actually do emit an addition. It does not apply if we emit a
+ // subtraction.
+ bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized);
+ bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized);
+
+ // Create the PHI.
+ BasicBlock *Header = L->getHeader();
+ Builder.SetInsertPoint(Header, Header->begin());
+ pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
+ PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE),
+ Twine(IVName) + ".iv");
+ rememberInstruction(PN);
+
+ // Create the step instructions and populate the PHI.
+ for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
+ BasicBlock *Pred = *HPI;
+
+ // Add a start value.
+ if (!L->contains(Pred)) {
+ PN->addIncoming(StartV, Pred);
+ continue;
+ }
+
+ // Create a step value and add it to the PHI.
+ // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
+ // instructions at IVIncInsertPos.
+ Instruction *InsertPos = L == IVIncInsertLoop ?
+ IVIncInsertPos : Pred->getTerminator();
+ Builder.SetInsertPoint(InsertPos);
+ Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+
+ if (isa<OverflowingBinaryOperator>(IncV)) {
+ if (IncrementIsNUW)
+ cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
+ if (IncrementIsNSW)
+ cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
+ }
+ PN->addIncoming(IncV, Pred);
+ }
+
+ // After expanding subexpressions, restore the PostIncLoops set so the caller
+ // can ensure that IVIncrement dominates the current uses.
+ PostIncLoops = SavedPostIncLoops;
+
+ // Remember this PHI, even in post-inc mode.
+ InsertedValues.insert(PN);
+
+ return PN;
+}
+
+Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
+ Type *STy = S->getType();
+ Type *IntTy = SE.getEffectiveSCEVType(STy);
+ const Loop *L = S->getLoop();
+
+ // Determine a normalized form of this expression, which is the expression
+ // before any post-inc adjustment is made.
+ const SCEVAddRecExpr *Normalized = S;
+ if (PostIncLoops.count(L)) {
+ PostIncLoopSet Loops;
+ Loops.insert(L);
+ Normalized = cast<SCEVAddRecExpr>(normalizeForPostIncUse(S, Loops, SE));
+ }
+
+ // Strip off any non-loop-dominating component from the addrec start.
+ const SCEV *Start = Normalized->getStart();
+ const SCEV *PostLoopOffset = nullptr;
+ if (!SE.properlyDominates(Start, L->getHeader())) {
+ PostLoopOffset = Start;
+ Start = SE.getConstant(Normalized->getType(), 0);
+ Normalized = cast<SCEVAddRecExpr>(
+ SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE),
+ Normalized->getLoop(),
+ Normalized->getNoWrapFlags(SCEV::FlagNW)));
+ }
+
+ // Strip off any non-loop-dominating component from the addrec step.
+ const SCEV *Step = Normalized->getStepRecurrence(SE);
+ const SCEV *PostLoopScale = nullptr;
+ if (!SE.dominates(Step, L->getHeader())) {
+ PostLoopScale = Step;
+ Step = SE.getConstant(Normalized->getType(), 1);
+ if (!Start->isZero()) {
+ // The normalization below assumes that Start is constant zero, so if
+ // it isn't re-associate Start to PostLoopOffset.
+ assert(!PostLoopOffset && "Start not-null but PostLoopOffset set?");
+ PostLoopOffset = Start;
+ Start = SE.getConstant(Normalized->getType(), 0);
+ }
+ Normalized =
+ cast<SCEVAddRecExpr>(SE.getAddRecExpr(
+ Start, Step, Normalized->getLoop(),
+ Normalized->getNoWrapFlags(SCEV::FlagNW)));
+ }
+
+ // Expand the core addrec. If we need post-loop scaling, force it to
+ // expand to an integer type to avoid the need for additional casting.
+ Type *ExpandTy = PostLoopScale ? IntTy : STy;
+ // We can't use a pointer type for the addrec if the pointer type is
+ // non-integral.
+ Type *AddRecPHIExpandTy =
+ DL.isNonIntegralPointerType(STy) ? Normalized->getType() : ExpandTy;
+
+ // In some cases, we decide to reuse an existing phi node but need to truncate
+ // it and/or invert the step.
+ Type *TruncTy = nullptr;
+ bool InvertStep = false;
+ PHINode *PN = getAddRecExprPHILiterally(Normalized, L, AddRecPHIExpandTy,
+ IntTy, TruncTy, InvertStep);
+
+ // Accommodate post-inc mode, if necessary.
+ Value *Result;
+ if (!PostIncLoops.count(L))
+ Result = PN;
+ else {
+ // In PostInc mode, use the post-incremented value.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ assert(LatchBlock && "PostInc mode requires a unique loop latch!");
+ Result = PN->getIncomingValueForBlock(LatchBlock);
+
+ // For an expansion to use the postinc form, the client must call
+ // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
+ // or dominated by IVIncInsertPos.
+ if (isa<Instruction>(Result) &&
+ !SE.DT.dominates(cast<Instruction>(Result),
+ &*Builder.GetInsertPoint())) {
+ // The induction variable's postinc expansion does not dominate this use.
+ // IVUsers tries to prevent this case, so it is rare. However, it can
+ // happen when an IVUser outside the loop is not dominated by the latch
+ // block. Adjusting IVIncInsertPos before expansion begins cannot handle
+ // all cases. Consider a phi outside whose operand is replaced during
+ // expansion with the value of the postinc user. Without fundamentally
+ // changing the way postinc users are tracked, the only remedy is
+ // inserting an extra IV increment. StepV might fold into PostLoopOffset,
+ // but hopefully expandCodeFor handles that.
+ bool useSubtract =
+ !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
+ if (useSubtract)
+ Step = SE.getNegativeSCEV(Step);
+ Value *StepV;
+ {
+ // Expand the step somewhere that dominates the loop header.
+ SCEVInsertPointGuard Guard(Builder, this);
+ StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
+ }
+ Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+ }
+ }
+
+ // We have decided to reuse an induction variable of a dominating loop. Apply
+ // truncation and/or inversion of the step.
+ if (TruncTy) {
+ Type *ResTy = Result->getType();
+ // Normalize the result type.
+ if (ResTy != SE.getEffectiveSCEVType(ResTy))
+ Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy));
+ // Truncate the result.
+ if (TruncTy != Result->getType()) {
+ Result = Builder.CreateTrunc(Result, TruncTy);
+ rememberInstruction(Result);
+ }
+ // Invert the result.
+ if (InvertStep) {
+ Result = Builder.CreateSub(expandCodeFor(Normalized->getStart(), TruncTy),
+ Result);
+ rememberInstruction(Result);
+ }
+ }
+
+ // Re-apply any non-loop-dominating scale.
+ if (PostLoopScale) {
+ assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
+ Result = InsertNoopCastOfTo(Result, IntTy);
+ Result = Builder.CreateMul(Result,
+ expandCodeFor(PostLoopScale, IntTy));
+ rememberInstruction(Result);
+ }
+
+ // Re-apply any non-loop-dominating offset.
+ if (PostLoopOffset) {
+ if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
+ if (Result->getType()->isIntegerTy()) {
+ Value *Base = expandCodeFor(PostLoopOffset, ExpandTy);
+ Result = expandAddToGEP(SE.getUnknown(Result), PTy, IntTy, Base);
+ } else {
+ Result = expandAddToGEP(PostLoopOffset, PTy, IntTy, Result);
+ }
+ } else {
+ Result = InsertNoopCastOfTo(Result, IntTy);
+ Result = Builder.CreateAdd(Result,
+ expandCodeFor(PostLoopOffset, IntTy));
+ rememberInstruction(Result);
+ }
+ }
+
+ return Result;
+}
+
+Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
+ // In canonical mode we compute the addrec as an expression of a canonical IV
+ // using evaluateAtIteration and expand the resulting SCEV expression. This
+ // way we avoid introducing new IVs to carry on the comutation of the addrec
+ // throughout the loop.
+ //
+ // For nested addrecs evaluateAtIteration might need a canonical IV of a
+ // type wider than the addrec itself. Emitting a canonical IV of the
+ // proper type might produce non-legal types, for example expanding an i64
+ // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
+ // back to non-canonical mode for nested addrecs.
+ if (!CanonicalMode || (S->getNumOperands() > 2))
+ return expandAddRecExprLiterally(S);
+
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ const Loop *L = S->getLoop();
+
+ // First check for an existing canonical IV in a suitable type.
+ PHINode *CanonicalIV = nullptr;
+ if (PHINode *PN = L->getCanonicalInductionVariable())
+ if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
+ CanonicalIV = PN;
+
+ // Rewrite an AddRec in terms of the canonical induction variable, if
+ // its type is more narrow.
+ if (CanonicalIV &&
+ SE.getTypeSizeInBits(CanonicalIV->getType()) >
+ SE.getTypeSizeInBits(Ty)) {
+ SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
+ for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
+ NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
+ Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
+ S->getNoWrapFlags(SCEV::FlagNW)));
+ BasicBlock::iterator NewInsertPt =
+ findInsertPointAfter(cast<Instruction>(V), Builder.GetInsertBlock());
+ V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr,
+ &*NewInsertPt);
+ return V;
+ }
+
+ // {X,+,F} --> X + {0,+,F}
+ if (!S->getStart()->isZero()) {
+ SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
+ NewOps[0] = SE.getConstant(Ty, 0);
+ const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
+ S->getNoWrapFlags(SCEV::FlagNW));
+
+ // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
+ // comments on expandAddToGEP for details.
+ const SCEV *Base = S->getStart();
+ // Dig into the expression to find the pointer base for a GEP.
+ const SCEV *ExposedRest = Rest;
+ ExposePointerBase(Base, ExposedRest, SE);
+ // If we found a pointer, expand the AddRec with a GEP.
+ if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
+ // Make sure the Base isn't something exotic, such as a multiplied
+ // or divided pointer value. In those cases, the result type isn't
+ // actually a pointer type.
+ if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
+ Value *StartV = expand(Base);
+ assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
+ return expandAddToGEP(ExposedRest, PTy, Ty, StartV);
+ }
+ }
+
+ // Just do a normal add. Pre-expand the operands to suppress folding.
+ //
+ // The LHS and RHS values are factored out of the expand call to make the
+ // output independent of the argument evaluation order.
+ const SCEV *AddExprLHS = SE.getUnknown(expand(S->getStart()));
+ const SCEV *AddExprRHS = SE.getUnknown(expand(Rest));
+ return expand(SE.getAddExpr(AddExprLHS, AddExprRHS));
+ }
+
+ // If we don't yet have a canonical IV, create one.
+ if (!CanonicalIV) {
+ // Create and insert the PHI node for the induction variable in the
+ // specified loop.
+ BasicBlock *Header = L->getHeader();
+ pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
+ CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar",
+ &Header->front());
+ rememberInstruction(CanonicalIV);
+
+ SmallSet<BasicBlock *, 4> PredSeen;
+ Constant *One = ConstantInt::get(Ty, 1);
+ for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
+ BasicBlock *HP = *HPI;
+ if (!PredSeen.insert(HP).second) {
+ // There must be an incoming value for each predecessor, even the
+ // duplicates!
+ CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP);
+ continue;
+ }
+
+ if (L->contains(HP)) {
+ // Insert a unit add instruction right before the terminator
+ // corresponding to the back-edge.
+ Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
+ "indvar.next",
+ HP->getTerminator());
+ Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
+ rememberInstruction(Add);
+ CanonicalIV->addIncoming(Add, HP);
+ } else {
+ CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
+ }
+ }
+ }
+
+ // {0,+,1} --> Insert a canonical induction variable into the loop!
+ if (S->isAffine() && S->getOperand(1)->isOne()) {
+ assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
+ "IVs with types different from the canonical IV should "
+ "already have been handled!");
+ return CanonicalIV;
+ }
+
+ // {0,+,F} --> {0,+,1} * F
+
+ // If this is a simple linear addrec, emit it now as a special case.
+ if (S->isAffine()) // {0,+,F} --> i*F
+ return
+ expand(SE.getTruncateOrNoop(
+ SE.getMulExpr(SE.getUnknown(CanonicalIV),
+ SE.getNoopOrAnyExtend(S->getOperand(1),
+ CanonicalIV->getType())),
+ Ty));
+
+ // If this is a chain of recurrences, turn it into a closed form, using the
+ // folders, then expandCodeFor the closed form. This allows the folders to
+ // simplify the expression without having to build a bunch of special code
+ // into this folder.
+ const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV.
+
+ // Promote S up to the canonical IV type, if the cast is foldable.
+ const SCEV *NewS = S;
+ const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
+ if (isa<SCEVAddRecExpr>(Ext))
+ NewS = Ext;
+
+ const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
+ //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n";
+
+ // Truncate the result down to the original type, if needed.
+ const SCEV *T = SE.getTruncateOrNoop(V, Ty);
+ return expand(T);
+}
+
+Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expandCodeFor(S->getOperand(),
+ SE.getEffectiveSCEVType(S->getOperand()->getType()));
+ Value *I = Builder.CreateTrunc(V, Ty);
+ rememberInstruction(I);
+ return I;
+}
+
+Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expandCodeFor(S->getOperand(),
+ SE.getEffectiveSCEVType(S->getOperand()->getType()));
+ Value *I = Builder.CreateZExt(V, Ty);
+ rememberInstruction(I);
+ return I;
+}
+
+Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expandCodeFor(S->getOperand(),
+ SE.getEffectiveSCEVType(S->getOperand()->getType()));
+ Value *I = Builder.CreateSExt(V, Ty);
+ rememberInstruction(I);
+ return I;
+}
+
+Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
+ Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
+ Type *Ty = LHS->getType();
+ for (int i = S->getNumOperands()-2; i >= 0; --i) {
+ // In the case of mixed integer and pointer types, do the
+ // rest of the comparisons as integer.
+ Type *OpTy = S->getOperand(i)->getType();
+ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
+ Ty = SE.getEffectiveSCEVType(Ty);
+ LHS = InsertNoopCastOfTo(LHS, Ty);
+ }
+ Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *ICmp = Builder.CreateICmpSGT(LHS, RHS);
+ rememberInstruction(ICmp);
+ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
+ rememberInstruction(Sel);
+ LHS = Sel;
+ }
+ // In the case of mixed integer and pointer types, cast the
+ // final result back to the pointer type.
+ if (LHS->getType() != S->getType())
+ LHS = InsertNoopCastOfTo(LHS, S->getType());
+ return LHS;
+}
+
+Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
+ Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
+ Type *Ty = LHS->getType();
+ for (int i = S->getNumOperands()-2; i >= 0; --i) {
+ // In the case of mixed integer and pointer types, do the
+ // rest of the comparisons as integer.
+ Type *OpTy = S->getOperand(i)->getType();
+ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
+ Ty = SE.getEffectiveSCEVType(Ty);
+ LHS = InsertNoopCastOfTo(LHS, Ty);
+ }
+ Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *ICmp = Builder.CreateICmpUGT(LHS, RHS);
+ rememberInstruction(ICmp);
+ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
+ rememberInstruction(Sel);
+ LHS = Sel;
+ }
+ // In the case of mixed integer and pointer types, cast the
+ // final result back to the pointer type.
+ if (LHS->getType() != S->getType())
+ LHS = InsertNoopCastOfTo(LHS, S->getType());
+ return LHS;
+}
+
+Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
+ Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
+ Type *Ty = LHS->getType();
+ for (int i = S->getNumOperands() - 2; i >= 0; --i) {
+ // In the case of mixed integer and pointer types, do the
+ // rest of the comparisons as integer.
+ Type *OpTy = S->getOperand(i)->getType();
+ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
+ Ty = SE.getEffectiveSCEVType(Ty);
+ LHS = InsertNoopCastOfTo(LHS, Ty);
+ }
+ Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *ICmp = Builder.CreateICmpSLT(LHS, RHS);
+ rememberInstruction(ICmp);
+ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin");
+ rememberInstruction(Sel);
+ LHS = Sel;
+ }
+ // In the case of mixed integer and pointer types, cast the
+ // final result back to the pointer type.
+ if (LHS->getType() != S->getType())
+ LHS = InsertNoopCastOfTo(LHS, S->getType());
+ return LHS;
+}
+
+Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
+ Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
+ Type *Ty = LHS->getType();
+ for (int i = S->getNumOperands() - 2; i >= 0; --i) {
+ // In the case of mixed integer and pointer types, do the
+ // rest of the comparisons as integer.
+ Type *OpTy = S->getOperand(i)->getType();
+ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
+ Ty = SE.getEffectiveSCEVType(Ty);
+ LHS = InsertNoopCastOfTo(LHS, Ty);
+ }
+ Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *ICmp = Builder.CreateICmpULT(LHS, RHS);
+ rememberInstruction(ICmp);
+ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin");
+ rememberInstruction(Sel);
+ LHS = Sel;
+ }
+ // In the case of mixed integer and pointer types, cast the
+ // final result back to the pointer type.
+ if (LHS->getType() != S->getType())
+ LHS = InsertNoopCastOfTo(LHS, S->getType());
+ return LHS;
+}
+
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
+ Instruction *IP) {
+ setInsertPoint(IP);
+ return expandCodeFor(SH, Ty);
+}
+
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
+ // Expand the code for this SCEV.
+ Value *V = expand(SH);
+ if (Ty) {
+ assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
+ "non-trivial casts should be done with the SCEVs directly!");
+ V = InsertNoopCastOfTo(V, Ty);
+ }
+ return V;
+}
+
+ScalarEvolution::ValueOffsetPair
+SCEVExpander::FindValueInExprValueMap(const SCEV *S,
+ const Instruction *InsertPt) {
+ SetVector<ScalarEvolution::ValueOffsetPair> *Set = SE.getSCEVValues(S);
+ // If the expansion is not in CanonicalMode, and the SCEV contains any
+ // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
+ if (CanonicalMode || !SE.containsAddRecurrence(S)) {
+ // If S is scConstant, it may be worse to reuse an existing Value.
+ if (S->getSCEVType() != scConstant && Set) {
+ // Choose a Value from the set which dominates the insertPt.
+ // insertPt should be inside the Value's parent loop so as not to break
+ // the LCSSA form.
+ for (auto const &VOPair : *Set) {
+ Value *V = VOPair.first;
+ ConstantInt *Offset = VOPair.second;
+ Instruction *EntInst = nullptr;
+ if (V && isa<Instruction>(V) && (EntInst = cast<Instruction>(V)) &&
+ S->getType() == V->getType() &&
+ EntInst->getFunction() == InsertPt->getFunction() &&
+ SE.DT.dominates(EntInst, InsertPt) &&
+ (SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
+ SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
+ return {V, Offset};
+ }
+ }
+ }
+ return {nullptr, nullptr};
+}
+
+// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
+// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
+// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
+// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
+// the expansion will try to reuse Value from ExprValueMap, and only when it
+// fails, expand the SCEV literally.
+Value *SCEVExpander::expand(const SCEV *S) {
+ // Compute an insertion point for this SCEV object. Hoist the instructions
+ // as far out in the loop nest as possible.
+ Instruction *InsertPt = &*Builder.GetInsertPoint();
+
+ // We can move insertion point only if there is no div or rem operations
+ // otherwise we are risky to move it over the check for zero denominator.
+ auto SafeToHoist = [](const SCEV *S) {
+ return !SCEVExprContains(S, [](const SCEV *S) {
+ if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
+ if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
+ // Division by non-zero constants can be hoisted.
+ return SC->getValue()->isZero();
+ // All other divisions should not be moved as they may be
+ // divisions by zero and should be kept within the
+ // conditions of the surrounding loops that guard their
+ // execution (see PR35406).
+ return true;
+ }
+ return false;
+ });
+ };
+ if (SafeToHoist(S)) {
+ for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
+ L = L->getParentLoop()) {
+ if (SE.isLoopInvariant(S, L)) {
+ if (!L) break;
+ if (BasicBlock *Preheader = L->getLoopPreheader())
+ InsertPt = Preheader->getTerminator();
+ else
+ // LSR sets the insertion point for AddRec start/step values to the
+ // block start to simplify value reuse, even though it's an invalid
+ // position. SCEVExpander must correct for this in all cases.
+ InsertPt = &*L->getHeader()->getFirstInsertionPt();
+ } else {
+ // If the SCEV is computable at this level, insert it into the header
+ // after the PHIs (and after any other instructions that we've inserted
+ // there) so that it is guaranteed to dominate any user inside the loop.
+ if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
+ InsertPt = &*L->getHeader()->getFirstInsertionPt();
+ while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
+ (isInsertedInstruction(InsertPt) ||
+ isa<DbgInfoIntrinsic>(InsertPt)))
+ InsertPt = &*std::next(InsertPt->getIterator());
+ break;
+ }
+ }
+ }
+
+ // IndVarSimplify sometimes sets the insertion point at the block start, even
+ // when there are PHIs at that point. We must correct for this.
+ if (isa<PHINode>(*InsertPt))
+ InsertPt = &*InsertPt->getParent()->getFirstInsertionPt();
+
+ // Check to see if we already expanded this here.
+ auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
+ if (I != InsertedExpressions.end())
+ return I->second;
+
+ SCEVInsertPointGuard Guard(Builder, this);
+ Builder.SetInsertPoint(InsertPt);
+
+ // Expand the expression into instructions.
+ ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, InsertPt);
+ Value *V = VO.first;
+
+ if (!V)
+ V = visit(S);
+ else if (VO.second) {
+ if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) {
+ Type *Ety = Vty->getPointerElementType();
+ int64_t Offset = VO.second->getSExtValue();
+ int64_t ESize = SE.getTypeSizeInBits(Ety);
+ if ((Offset * 8) % ESize == 0) {
+ ConstantInt *Idx =
+ ConstantInt::getSigned(VO.second->getType(), -(Offset * 8) / ESize);
+ V = Builder.CreateGEP(Ety, V, Idx, "scevgep");
+ } else {
+ ConstantInt *Idx =
+ ConstantInt::getSigned(VO.second->getType(), -Offset);
+ unsigned AS = Vty->getAddressSpace();
+ V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS));
+ V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx,
+ "uglygep");
+ V = Builder.CreateBitCast(V, Vty);
+ }
+ } else {
+ V = Builder.CreateSub(V, VO.second);
+ }
+ }
+ // Remember the expanded value for this SCEV at this location.
+ //
+ // This is independent of PostIncLoops. The mapped value simply materializes
+ // the expression at this insertion point. If the mapped value happened to be
+ // a postinc expansion, it could be reused by a non-postinc user, but only if
+ // its insertion point was already at the head of the loop.
+ InsertedExpressions[std::make_pair(S, InsertPt)] = V;
+ return V;
+}
+
+void SCEVExpander::rememberInstruction(Value *I) {
+ if (!PostIncLoops.empty())
+ InsertedPostIncValues.insert(I);
+ else
+ InsertedValues.insert(I);
+}
+
+/// getOrInsertCanonicalInductionVariable - This method returns the
+/// canonical induction variable of the specified type for the specified
+/// loop (inserting one if there is none). A canonical induction variable
+/// starts at zero and steps by one on each iteration.
+PHINode *
+SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
+ Type *Ty) {
+ assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
+
+ // Build a SCEV for {0,+,1}<L>.
+ // Conservatively use FlagAnyWrap for now.
+ const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0),
+ SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap);
+
+ // Emit code for it.
+ SCEVInsertPointGuard Guard(Builder, this);
+ PHINode *V =
+ cast<PHINode>(expandCodeFor(H, nullptr, &L->getHeader()->front()));
+
+ return V;
+}
+
+/// replaceCongruentIVs - Check for congruent phis in this loop header and
+/// replace them with their most canonical representative. Return the number of
+/// phis eliminated.
+///
+/// This does not depend on any SCEVExpander state but should be used in
+/// the same context that SCEVExpander is used.
+unsigned
+SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts,
+ const TargetTransformInfo *TTI) {
+ // Find integer phis in order of increasing width.
+ SmallVector<PHINode*, 8> Phis;
+ for (PHINode &PN : L->getHeader()->phis())
+ Phis.push_back(&PN);
+
+ if (TTI)
+ llvm::sort(Phis, [](Value *LHS, Value *RHS) {
+ // Put pointers at the back and make sure pointer < pointer = false.
+ if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
+ return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
+ return RHS->getType()->getPrimitiveSizeInBits() <
+ LHS->getType()->getPrimitiveSizeInBits();
+ });
+
+ unsigned NumElim = 0;
+ DenseMap<const SCEV *, PHINode *> ExprToIVMap;
+ // Process phis from wide to narrow. Map wide phis to their truncation
+ // so narrow phis can reuse them.
+ for (PHINode *Phi : Phis) {
+ auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
+ if (Value *V = SimplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC}))
+ return V;
+ if (!SE.isSCEVable(PN->getType()))
+ return nullptr;
+ auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN));
+ if (!Const)
+ return nullptr;
+ return Const->getValue();
+ };
+
+ // Fold constant phis. They may be congruent to other constant phis and
+ // would confuse the logic below that expects proper IVs.
+ if (Value *V = SimplifyPHINode(Phi)) {
+ if (V->getType() != Phi->getType())
+ continue;
+ Phi->replaceAllUsesWith(V);
+ DeadInsts.emplace_back(Phi);
+ ++NumElim;
+ DEBUG_WITH_TYPE(DebugType, dbgs()
+ << "INDVARS: Eliminated constant iv: " << *Phi << '\n');
+ continue;
+ }
+
+ if (!SE.isSCEVable(Phi->getType()))
+ continue;
+
+ PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
+ if (!OrigPhiRef) {
+ OrigPhiRef = Phi;
+ if (Phi->getType()->isIntegerTy() && TTI &&
+ TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
+ // This phi can be freely truncated to the narrowest phi type. Map the
+ // truncated expression to it so it will be reused for narrow types.
+ const SCEV *TruncExpr =
+ SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType());
+ ExprToIVMap[TruncExpr] = Phi;
+ }
+ continue;
+ }
+
+ // Replacing a pointer phi with an integer phi or vice-versa doesn't make
+ // sense.
+ if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
+ continue;
+
+ if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+ Instruction *OrigInc = dyn_cast<Instruction>(
+ OrigPhiRef->getIncomingValueForBlock(LatchBlock));
+ Instruction *IsomorphicInc =
+ dyn_cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
+
+ if (OrigInc && IsomorphicInc) {
+ // If this phi has the same width but is more canonical, replace the
+ // original with it. As part of the "more canonical" determination,
+ // respect a prior decision to use an IV chain.
+ if (OrigPhiRef->getType() == Phi->getType() &&
+ !(ChainedPhis.count(Phi) ||
+ isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) &&
+ (ChainedPhis.count(Phi) ||
+ isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) {
+ std::swap(OrigPhiRef, Phi);
+ std::swap(OrigInc, IsomorphicInc);
+ }
+ // Replacing the congruent phi is sufficient because acyclic
+ // redundancy elimination, CSE/GVN, should handle the
+ // rest. However, once SCEV proves that a phi is congruent,
+ // it's often the head of an IV user cycle that is isomorphic
+ // with the original phi. It's worth eagerly cleaning up the
+ // common case of a single IV increment so that DeleteDeadPHIs
+ // can remove cycles that had postinc uses.
+ const SCEV *TruncExpr =
+ SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType());
+ if (OrigInc != IsomorphicInc &&
+ TruncExpr == SE.getSCEV(IsomorphicInc) &&
+ SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc) &&
+ hoistIVInc(OrigInc, IsomorphicInc)) {
+ DEBUG_WITH_TYPE(DebugType,
+ dbgs() << "INDVARS: Eliminated congruent iv.inc: "
+ << *IsomorphicInc << '\n');
+ Value *NewInc = OrigInc;
+ if (OrigInc->getType() != IsomorphicInc->getType()) {
+ Instruction *IP = nullptr;
+ if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
+ IP = &*PN->getParent()->getFirstInsertionPt();
+ else
+ IP = OrigInc->getNextNode();
+
+ IRBuilder<> Builder(IP);
+ Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
+ NewInc = Builder.CreateTruncOrBitCast(
+ OrigInc, IsomorphicInc->getType(), IVName);
+ }
+ IsomorphicInc->replaceAllUsesWith(NewInc);
+ DeadInsts.emplace_back(IsomorphicInc);
+ }
+ }
+ }
+ DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv: "
+ << *Phi << '\n');
+ ++NumElim;
+ Value *NewIV = OrigPhiRef;
+ if (OrigPhiRef->getType() != Phi->getType()) {
+ IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt());
+ Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
+ NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
+ }
+ Phi->replaceAllUsesWith(NewIV);
+ DeadInsts.emplace_back(Phi);
+ }
+ return NumElim;
+}
+
+Value *SCEVExpander::getExactExistingExpansion(const SCEV *S,
+ const Instruction *At, Loop *L) {
+ Optional<ScalarEvolution::ValueOffsetPair> VO =
+ getRelatedExistingExpansion(S, At, L);
+ if (VO && VO.getValue().second == nullptr)
+ return VO.getValue().first;
+ return nullptr;
+}
+
+Optional<ScalarEvolution::ValueOffsetPair>
+SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
+ Loop *L) {
+ using namespace llvm::PatternMatch;
+
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ // Look for suitable value in simple conditions at the loop exits.
+ for (BasicBlock *BB : ExitingBlocks) {
+ ICmpInst::Predicate Pred;
+ Instruction *LHS, *RHS;
+
+ if (!match(BB->getTerminator(),
+ m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
+ m_BasicBlock(), m_BasicBlock())))
+ continue;
+
+ if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
+ return ScalarEvolution::ValueOffsetPair(LHS, nullptr);
+
+ if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
+ return ScalarEvolution::ValueOffsetPair(RHS, nullptr);
+ }
+
+ // Use expand's logic which is used for reusing a previous Value in
+ // ExprValueMap.
+ ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At);
+ if (VO.first)
+ return VO;
+
+ // There is potential to make this significantly smarter, but this simple
+ // heuristic already gets some interesting cases.
+
+ // Can not find suitable value.
+ return None;
+}
+
+bool SCEVExpander::isHighCostExpansionHelper(
+ const SCEV *S, Loop *L, const Instruction &At, int &BudgetRemaining,
+ const TargetTransformInfo &TTI, SmallPtrSetImpl<const SCEV *> &Processed,
+ SmallVectorImpl<const SCEV *> &Worklist) {
+ if (BudgetRemaining < 0)
+ return true; // Already run out of budget, give up.
+
+ // Was the cost of expansion of this expression already accounted for?
+ if (!Processed.insert(S).second)
+ return false; // We have already accounted for this expression.
+
+ // If we can find an existing value for this scev available at the point "At"
+ // then consider the expression cheap.
+ if (getRelatedExistingExpansion(S, &At, L))
+ return false; // Consider the expression to be free.
+
+ switch (S->getSCEVType()) {
+ case scUnknown:
+ case scConstant:
+ return false; // Assume to be zero-cost.
+ }
+
+ TargetTransformInfo::TargetCostKind CostKind =
+ TargetTransformInfo::TCK_RecipThroughput;
+
+ if (auto *CastExpr = dyn_cast<SCEVCastExpr>(S)) {
+ unsigned Opcode;
+ switch (S->getSCEVType()) {
+ case scTruncate:
+ Opcode = Instruction::Trunc;
+ break;
+ case scZeroExtend:
+ Opcode = Instruction::ZExt;
+ break;
+ case scSignExtend:
+ Opcode = Instruction::SExt;
+ break;
+ default:
+ llvm_unreachable("There are no other cast types.");
+ }
+ const SCEV *Op = CastExpr->getOperand();
+ BudgetRemaining -= TTI.getCastInstrCost(Opcode, /*Dst=*/S->getType(),
+ /*Src=*/Op->getType(), CostKind);
+ Worklist.emplace_back(Op);
+ return false; // Will answer upon next entry into this function.
+ }
+
+ if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {
+ // If the divisor is a power of two count this as a logical right-shift.
+ if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS())) {
+ if (SC->getAPInt().isPowerOf2()) {
+ BudgetRemaining -=
+ TTI.getArithmeticInstrCost(Instruction::LShr, S->getType(),
+ CostKind);
+ // Note that we don't count the cost of RHS, because it is a constant,
+ // and we consider those to be free. But if that changes, we would need
+ // to log2() it first before calling isHighCostExpansionHelper().
+ Worklist.emplace_back(UDivExpr->getLHS());
+ return false; // Will answer upon next entry into this function.
+ }
+ }
+
+ // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
+ // HowManyLessThans produced to compute a precise expression, rather than a
+ // UDiv from the user's code. If we can't find a UDiv in the code with some
+ // simple searching, we need to account for it's cost.
+
+ // At the beginning of this function we already tried to find existing
+ // value for plain 'S'. Now try to lookup 'S + 1' since it is common
+ // pattern involving division. This is just a simple search heuristic.
+ if (getRelatedExistingExpansion(
+ SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L))
+ return false; // Consider it to be free.
+
+ // Need to count the cost of this UDiv.
+ BudgetRemaining -=
+ TTI.getArithmeticInstrCost(Instruction::UDiv, S->getType(),
+ CostKind);
+ Worklist.insert(Worklist.end(), {UDivExpr->getLHS(), UDivExpr->getRHS()});
+ return false; // Will answer upon next entry into this function.
+ }
+
+ if (const auto *NAry = dyn_cast<SCEVAddRecExpr>(S)) {
+ Type *OpType = NAry->getType();
+
+ assert(NAry->getNumOperands() >= 2 &&
+ "Polynomial should be at least linear");
+
+ int AddCost =
+ TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind);
+ int MulCost =
+ TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind);
+
+ // In this polynominal, we may have some zero operands, and we shouldn't
+ // really charge for those. So how many non-zero coeffients are there?
+ int NumTerms = llvm::count_if(NAry->operands(),
+ [](const SCEV *S) { return !S->isZero(); });
+ assert(NumTerms >= 1 && "Polynominal should have at least one term.");
+ assert(!(*std::prev(NAry->operands().end()))->isZero() &&
+ "Last operand should not be zero");
+
+ // Much like with normal add expr, the polynominal will require
+ // one less addition than the number of it's terms.
+ BudgetRemaining -= AddCost * (NumTerms - 1);
+ if (BudgetRemaining < 0)
+ return true;
+
+ // Ignoring constant term (operand 0), how many of the coeffients are u> 1?
+ int NumNonZeroDegreeNonOneTerms =
+ llvm::count_if(make_range(std::next(NAry->op_begin()), NAry->op_end()),
+ [](const SCEV *S) {
+ auto *SConst = dyn_cast<SCEVConstant>(S);
+ return !SConst || SConst->getAPInt().ugt(1);
+ });
+ // Here, *each* one of those will require a multiplication.
+ BudgetRemaining -= MulCost * NumNonZeroDegreeNonOneTerms;
+ if (BudgetRemaining < 0)
+ return true;
+
+ // What is the degree of this polynominal?
+ int PolyDegree = NAry->getNumOperands() - 1;
+ assert(PolyDegree >= 1 && "Should be at least affine.");
+
+ // The final term will be:
+ // Op_{PolyDegree} * x ^ {PolyDegree}
+ // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations.
+ // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for
+ // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free.
+ // FIXME: this is conservatively correct, but might be overly pessimistic.
+ BudgetRemaining -= MulCost * (PolyDegree - 1);
+ if (BudgetRemaining < 0)
+ return true;
+
+ // And finally, the operands themselves should fit within the budget.
+ Worklist.insert(Worklist.end(), NAry->operands().begin(),
+ NAry->operands().end());
+ return false; // So far so good, though ops may be too costly?
+ }
+
+ if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) {
+ Type *OpType = NAry->getType();
+
+ int PairCost;
+ switch (S->getSCEVType()) {
+ case scAddExpr:
+ PairCost =
+ TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind);
+ break;
+ case scMulExpr:
+ // TODO: this is a very pessimistic cost modelling for Mul,
+ // because of Bin Pow algorithm actually used by the expander,
+ // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
+ PairCost =
+ TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind);
+ break;
+ case scSMaxExpr:
+ case scUMaxExpr:
+ case scSMinExpr:
+ case scUMinExpr:
+ PairCost = TTI.getCmpSelInstrCost(Instruction::ICmp, OpType,
+ CmpInst::makeCmpResultType(OpType),
+ CostKind) +
+ TTI.getCmpSelInstrCost(Instruction::Select, OpType,
+ CmpInst::makeCmpResultType(OpType),
+ CostKind);
+ break;
+ default:
+ llvm_unreachable("There are no other variants here.");
+ }
+
+ assert(NAry->getNumOperands() > 1 &&
+ "Nary expr should have more than 1 operand.");
+ // The simple nary expr will require one less op (or pair of ops)
+ // than the number of it's terms.
+ BudgetRemaining -= PairCost * (NAry->getNumOperands() - 1);
+ if (BudgetRemaining < 0)
+ return true;
+
+ // And finally, the operands themselves should fit within the budget.
+ Worklist.insert(Worklist.end(), NAry->operands().begin(),
+ NAry->operands().end());
+ return false; // So far so good, though ops may be too costly?
+ }
+
+ llvm_unreachable("No other scev expressions possible.");
+}
+
+Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
+ Instruction *IP) {
+ assert(IP);
+ switch (Pred->getKind()) {
+ case SCEVPredicate::P_Union:
+ return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP);
+ case SCEVPredicate::P_Equal:
+ return expandEqualPredicate(cast<SCEVEqualPredicate>(Pred), IP);
+ case SCEVPredicate::P_Wrap: {
+ auto *AddRecPred = cast<SCEVWrapPredicate>(Pred);
+ return expandWrapPredicate(AddRecPred, IP);
+ }
+ }
+ llvm_unreachable("Unknown SCEV predicate type");
+}
+
+Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred,
+ Instruction *IP) {
+ Value *Expr0 = expandCodeFor(Pred->getLHS(), Pred->getLHS()->getType(), IP);
+ Value *Expr1 = expandCodeFor(Pred->getRHS(), Pred->getRHS()->getType(), IP);
+
+ Builder.SetInsertPoint(IP);
+ auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check");
+ return I;
+}
+
+Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
+ Instruction *Loc, bool Signed) {
+ assert(AR->isAffine() && "Cannot generate RT check for "
+ "non-affine expression");
+
+ SCEVUnionPredicate Pred;
+ const SCEV *ExitCount =
+ SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred);
+
+ assert(ExitCount != SE.getCouldNotCompute() && "Invalid loop count");
+
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ const SCEV *Start = AR->getStart();
+
+ Type *ARTy = AR->getType();
+ unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
+ unsigned DstBits = SE.getTypeSizeInBits(ARTy);
+
+ // The expression {Start,+,Step} has nusw/nssw if
+ // Step < 0, Start - |Step| * Backedge <= Start
+ // Step >= 0, Start + |Step| * Backedge > Start
+ // and |Step| * Backedge doesn't unsigned overflow.
+
+ IntegerType *CountTy = IntegerType::get(Loc->getContext(), SrcBits);
+ Builder.SetInsertPoint(Loc);
+ Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc);
+
+ IntegerType *Ty =
+ IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
+ Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty;
+
+ Value *StepValue = expandCodeFor(Step, Ty, Loc);
+ Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc);
+ Value *StartValue = expandCodeFor(Start, ARExpandTy, Loc);
+
+ ConstantInt *Zero =
+ ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits));
+
+ Builder.SetInsertPoint(Loc);
+ // Compute |Step|
+ Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero);
+ Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue);
+
+ // Get the backedge taken count and truncate or extended to the AR type.
+ Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
+ auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
+ Intrinsic::umul_with_overflow, Ty);
+
+ // Compute |Step| * Backedge
+ CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
+ Value *MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
+ Value *OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
+
+ // Compute:
+ // Start + |Step| * Backedge < Start
+ // Start - |Step| * Backedge > Start
+ Value *Add = nullptr, *Sub = nullptr;
+ if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARExpandTy)) {
+ const SCEV *MulS = SE.getSCEV(MulV);
+ const SCEV *NegMulS = SE.getNegativeSCEV(MulS);
+ Add = Builder.CreateBitCast(expandAddToGEP(MulS, ARPtrTy, Ty, StartValue),
+ ARPtrTy);
+ Sub = Builder.CreateBitCast(
+ expandAddToGEP(NegMulS, ARPtrTy, Ty, StartValue), ARPtrTy);
+ } else {
+ Add = Builder.CreateAdd(StartValue, MulV);
+ Sub = Builder.CreateSub(StartValue, MulV);
+ }
+
+ Value *EndCompareGT = Builder.CreateICmp(
+ Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
+
+ Value *EndCompareLT = Builder.CreateICmp(
+ Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
+
+ // Select the answer based on the sign of Step.
+ Value *EndCheck =
+ Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
+
+ // If the backedge taken count type is larger than the AR type,
+ // check that we don't drop any bits by truncating it. If we are
+ // dropping bits, then we have overflow (unless the step is zero).
+ if (SE.getTypeSizeInBits(CountTy) > SE.getTypeSizeInBits(Ty)) {
+ auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits);
+ auto *BackedgeCheck =
+ Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal,
+ ConstantInt::get(Loc->getContext(), MaxVal));
+ BackedgeCheck = Builder.CreateAnd(
+ BackedgeCheck, Builder.CreateICmp(ICmpInst::ICMP_NE, StepValue, Zero));
+
+ EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck);
+ }
+
+ EndCheck = Builder.CreateOr(EndCheck, OfMul);
+ return EndCheck;
+}
+
+Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred,
+ Instruction *IP) {
+ const auto *A = cast<SCEVAddRecExpr>(Pred->getExpr());
+ Value *NSSWCheck = nullptr, *NUSWCheck = nullptr;
+
+ // Add a check for NUSW
+ if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW)
+ NUSWCheck = generateOverflowCheck(A, IP, false);
+
+ // Add a check for NSSW
+ if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW)
+ NSSWCheck = generateOverflowCheck(A, IP, true);
+
+ if (NUSWCheck && NSSWCheck)
+ return Builder.CreateOr(NUSWCheck, NSSWCheck);
+
+ if (NUSWCheck)
+ return NUSWCheck;
+
+ if (NSSWCheck)
+ return NSSWCheck;
+
+ return ConstantInt::getFalse(IP->getContext());
+}
+
+Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union,
+ Instruction *IP) {
+ auto *BoolType = IntegerType::get(IP->getContext(), 1);
+ Value *Check = ConstantInt::getNullValue(BoolType);
+
+ // Loop over all checks in this set.
+ for (auto Pred : Union->getPredicates()) {
+ auto *NextCheck = expandCodeForPredicate(Pred, IP);
+ Builder.SetInsertPoint(IP);
+ Check = Builder.CreateOr(Check, NextCheck);
+ }
+
+ return Check;
+}
+
+namespace {
+// Search for a SCEV subexpression that is not safe to expand. Any expression
+// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
+// UDiv expressions. We don't know if the UDiv is derived from an IR divide
+// instruction, but the important thing is that we prove the denominator is
+// nonzero before expansion.
+//
+// IVUsers already checks that IV-derived expressions are safe. So this check is
+// only needed when the expression includes some subexpression that is not IV
+// derived.
+//
+// Currently, we only allow division by a nonzero constant here. If this is
+// inadequate, we could easily allow division by SCEVUnknown by using
+// ValueTracking to check isKnownNonZero().
+//
+// We cannot generally expand recurrences unless the step dominates the loop
+// header. The expander handles the special case of affine recurrences by
+// scaling the recurrence outside the loop, but this technique isn't generally
+// applicable. Expanding a nested recurrence outside a loop requires computing
+// binomial coefficients. This could be done, but the recurrence has to be in a
+// perfectly reduced form, which can't be guaranteed.
+struct SCEVFindUnsafe {
+ ScalarEvolution &SE;
+ bool IsUnsafe;
+
+ SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {}
+
+ bool follow(const SCEV *S) {
+ if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
+ if (!SC || SC->getValue()->isZero()) {
+ IsUnsafe = true;
+ return false;
+ }
+ }
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) {
+ IsUnsafe = true;
+ return false;
+ }
+ }
+ return true;
+ }
+ bool isDone() const { return IsUnsafe; }
+};
+}
+
+namespace llvm {
+bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
+ SCEVFindUnsafe Search(SE);
+ visitAll(S, Search);
+ return !Search.IsUnsafe;
+}
+
+bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint,
+ ScalarEvolution &SE) {
+ if (!isSafeToExpand(S, SE))
+ return false;
+ // We have to prove that the expanded site of S dominates InsertionPoint.
+ // This is easy when not in the same block, but hard when S is an instruction
+ // to be expanded somewhere inside the same block as our insertion point.
+ // What we really need here is something analogous to an OrderedBasicBlock,
+ // but for the moment, we paper over the problem by handling two common and
+ // cheap to check cases.
+ if (SE.properlyDominates(S, InsertionPoint->getParent()))
+ return true;
+ if (SE.dominates(S, InsertionPoint->getParent())) {
+ if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
+ return true;
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
+ for (const Value *V : InsertionPoint->operand_values())
+ if (V == U->getValue())
+ return true;
+ }
+ return false;
+}
+}
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index d93ca4f04cdbf..b450d71c996cb 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -33,7 +33,6 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
@@ -134,6 +133,11 @@ static cl::opt<unsigned> MaxSpeculationDepth(
cl::desc("Limit maximum recursion depth when calculating costs of "
"speculatively executed instructions"));
+static cl::opt<int>
+MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10),
+ cl::desc("Max size of a block which is still considered "
+ "small enough to thread through"));
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLinearMaps,
"Number of switch instructions turned into linear mapping");
@@ -192,20 +196,34 @@ class SimplifyCFGOpt {
bool FoldValueComparisonIntoPredecessors(Instruction *TI,
IRBuilder<> &Builder);
- bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
- bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
- bool SimplifySingleResume(ResumeInst *RI);
- bool SimplifyCommonResume(ResumeInst *RI);
- bool SimplifyCleanupReturn(CleanupReturnInst *RI);
- bool SimplifyUnreachable(UnreachableInst *UI);
- bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
- bool SimplifyIndirectBr(IndirectBrInst *IBI);
- bool SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
- bool SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
+ bool simplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
+ bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
+ bool simplifySingleResume(ResumeInst *RI);
+ bool simplifyCommonResume(ResumeInst *RI);
+ bool simplifyCleanupReturn(CleanupReturnInst *RI);
+ bool simplifyUnreachable(UnreachableInst *UI);
+ bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
+ bool simplifyIndirectBr(IndirectBrInst *IBI);
+ bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
+ bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
+ bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
+ bool SimplifyCondBranchToTwoReturns(BranchInst *BI, IRBuilder<> &Builder);
bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
IRBuilder<> &Builder);
+ bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI);
+ bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
+ const TargetTransformInfo &TTI);
+ bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
+ BasicBlock *TrueBB, BasicBlock *FalseBB,
+ uint32_t TrueWeight, uint32_t FalseWeight);
+ bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
+ const DataLayout &DL);
+ bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
+ bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
+ bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
+
public:
SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
@@ -317,7 +335,7 @@ static unsigned ComputeSpeculationCost(const User *I,
const TargetTransformInfo &TTI) {
assert(isSafeToSpeculativelyExecute(I) &&
"Instruction is not safe to speculatively execute!");
- return TTI.getUserCost(I);
+ return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency);
}
/// If we have a merge point of an "if condition" as accepted above,
@@ -1235,8 +1253,8 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I);
/// Given a conditional branch that goes to BB1 and BB2, hoist any common code
/// in the two blocks up into the branch block. The caller of this function
/// guarantees that BI's block dominates BB1 and BB2.
-static bool HoistThenElseCodeToIf(BranchInst *BI,
- const TargetTransformInfo &TTI) {
+bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
+ const TargetTransformInfo &TTI) {
// This does very trivial matching, with limited scanning, to find identical
// instructions in the two blocks. In particular, we don't want to get into
// O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
@@ -1287,6 +1305,14 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
return Changed;
+ // If any of the two call sites has nomerge attribute, stop hoisting.
+ if (const auto *CB1 = dyn_cast<CallBase>(I1))
+ if (CB1->cannotMerge())
+ return Changed;
+ if (const auto *CB2 = dyn_cast<CallBase>(I2))
+ if (CB2->cannotMerge())
+ return Changed;
+
if (isa<DbgInfoIntrinsic>(I1) || isa<DbgInfoIntrinsic>(I2)) {
assert (isa<DbgInfoIntrinsic>(I1) && isa<DbgInfoIntrinsic>(I2));
// The debug location is an integral part of a debug info intrinsic
@@ -1444,6 +1470,13 @@ static bool isLifeTimeMarker(const Instruction *I) {
return false;
}
+// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
+// into variables.
+static bool replacingOperandWithVariableIsCheap(const Instruction *I,
+ int OpIdx) {
+ return !isa<IntrinsicInst>(I);
+}
+
// All instructions in Insts belong to different blocks that all unconditionally
// branch to a common successor. Analyze each instruction and return true if it
// would be possible to sink them into their successor, creating one common
@@ -1465,8 +1498,9 @@ static bool canSinkInstructions(
// Conservatively return false if I is an inline-asm instruction. Sinking
// and merging inline-asm instructions can potentially create arguments
// that cannot satisfy the inline-asm constraints.
+ // If the instruction has nomerge attribute, return false.
if (const auto *C = dyn_cast<CallBase>(I))
- if (C->isInlineAsm())
+ if (C->isInlineAsm() || C->cannotMerge())
return false;
// Each instruction must have zero or one use.
@@ -1521,7 +1555,8 @@ static bool canSinkInstructions(
return false;
for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
- if (I0->getOperand(OI)->getType()->isTokenTy())
+ Value *Op = I0->getOperand(OI);
+ if (Op->getType()->isTokenTy())
// Don't touch any operand of token type.
return false;
@@ -1530,7 +1565,8 @@ static bool canSinkInstructions(
return I->getOperand(OI) == I0->getOperand(OI);
};
if (!all_of(Insts, SameAsI0)) {
- if (!canReplaceOperandWithVariable(I0, OI))
+ if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
+ !canReplaceOperandWithVariable(I0, OI))
// We can't create a PHI from this GEP.
return false;
// Don't create indirect calls! The called value is the final operand.
@@ -1960,8 +1996,8 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
/// \endcode
///
/// \returns true if the conditional block is removed.
-static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
- const TargetTransformInfo &TTI) {
+bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
+ const TargetTransformInfo &TTI) {
// Be conservative for now. FP select instruction can often be expensive.
Value *BrCond = BI->getCondition();
if (isa<FCmpInst>(BrCond))
@@ -2110,9 +2146,14 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
}
// Metadata can be dependent on the condition we are hoisting above.
- // Conservatively strip all metadata on the instruction.
- for (auto &I : *ThenBB)
+ // Conservatively strip all metadata on the instruction. Drop the debug loc
+ // to avoid making it appear as if the condition is a constant, which would
+ // be misleading while debugging.
+ for (auto &I : *ThenBB) {
+ if (!SpeculatedStoreValue || &I != SpeculatedStore)
+ I.setDebugLoc(DebugLoc());
I.dropUnknownNonDebugMetadata();
+ }
// Hoist the instructions.
BB->getInstList().splice(BI->getIterator(), ThenBB->getInstList(),
@@ -2131,13 +2172,12 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
continue;
// Create a select whose true value is the speculatively executed value and
- // false value is the preexisting value. Swap them if the branch
+ // false value is the pre-existing value. Swap them if the branch
// destinations were inverted.
Value *TrueV = ThenV, *FalseV = OrigV;
if (Invert)
std::swap(TrueV, FalseV);
- Value *V = Builder.CreateSelect(
- BrCond, TrueV, FalseV, "spec.select", BI);
+ Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
PN.setIncomingValue(OrigI, V);
PN.setIncomingValue(ThenI, V);
}
@@ -2154,12 +2194,15 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
/// Return true if we can thread a branch across this block.
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
- unsigned Size = 0;
+ int Size = 0;
for (Instruction &I : BB->instructionsWithoutDebug()) {
- if (Size > 10)
+ if (Size > MaxSmallBlockSize)
return false; // Don't clone large BB's.
- ++Size;
+ // We will delete Phis while threading, so Phis should not be accounted in
+ // block's size
+ if (!isa<PHINode>(I))
+ ++Size;
// We can only support instructions that do not define values that are
// live outside of the current basic block.
@@ -2306,9 +2349,6 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// dependence information for this check, but simplifycfg can't keep it up
// to date, and this catches most of the cases we care about anyway.
BasicBlock *BB = PN->getParent();
- const Function *Fn = BB->getParent();
- if (Fn && Fn->hasFnAttribute(Attribute::OptForFuzzing))
- return false;
BasicBlock *IfTrue, *IfFalse;
Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
@@ -2454,8 +2494,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
/// If we found a conditional branch that goes to two returning blocks,
/// try to merge them together into one return,
/// introducing a select if the return values disagree.
-static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
- IRBuilder<> &Builder) {
+bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI,
+ IRBuilder<> &Builder) {
assert(BI->isConditional() && "Must be a conditional branch");
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
@@ -2531,8 +2571,8 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
(void)RI;
LLVM_DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
- << "\n " << *BI << "NewRet = " << *RI << "TRUEBLOCK: "
- << *TrueSucc << "FALSEBLOCK: " << *FalseSucc);
+ << "\n " << *BI << "\nNewRet = " << *RI << "\nTRUEBLOCK: "
+ << *TrueSucc << "\nFALSEBLOCK: " << *FalseSucc);
EraseTerminatorAndDCECond(BI);
@@ -2588,6 +2628,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
const unsigned PredCount = pred_size(BB);
+ bool Changed = false;
+
Instruction *Cond = nullptr;
if (BI->isConditional())
Cond = dyn_cast<Instruction>(BI->getCondition());
@@ -2611,17 +2653,18 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
}
// Quit if we can't remove this instruction.
if (!tryCSEWithPredecessor(Curr, PB))
- return false;
+ return Changed;
+ Changed = true;
}
}
if (!Cond)
- return false;
+ return Changed;
}
if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
Cond->getParent() != BB || !Cond->hasOneUse())
- return false;
+ return Changed;
// Make sure the instruction after the condition is the cond branch.
BasicBlock::iterator CondIt = ++Cond->getIterator();
@@ -2631,7 +2674,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
++CondIt;
if (&*CondIt != BI)
- return false;
+ return Changed;
// Only allow this transformation if computing the condition doesn't involve
// too many instructions and these involved instructions can be executed
@@ -2645,11 +2688,11 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
if (isa<DbgInfoIntrinsic>(I))
continue;
if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(&*I))
- return false;
+ return Changed;
// I has only one use and can be executed unconditionally.
Instruction *User = dyn_cast<Instruction>(I->user_back());
if (User == nullptr || User->getParent() != BB)
- return false;
+ return Changed;
// I is used in the same BB. Since BI uses Cond and doesn't have more slots
// to use any other instruction, User must be an instruction between next(I)
// and Cond.
@@ -2659,23 +2702,23 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
NumBonusInsts += PredCount;
// Early exits once we reach the limit.
if (NumBonusInsts > BonusInstThreshold)
- return false;
+ return Changed;
}
// Cond is known to be a compare or binary operator. Check to make sure that
// neither operand is a potentially-trapping constant expression.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
if (CE->canTrap())
- return false;
+ return Changed;
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
if (CE->canTrap())
- return false;
+ return Changed;
// Finally, don't infinitely unroll conditional loops.
BasicBlock *TrueDest = BI->getSuccessor(0);
BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr;
if (TrueDest == BB || FalseDest == BB)
- return false;
+ return Changed;
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *PredBlock = *PI;
@@ -2715,6 +2758,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
}
LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
+ Changed = true;
+
IRBuilder<> Builder(PBI);
// If we need to invert the condition in the pred block to match, do so now.
@@ -2744,6 +2789,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
if (isa<DbgInfoIntrinsic>(BonusInst))
continue;
Instruction *NewBonusInst = BonusInst->clone();
+
+ // When we fold the bonus instructions we want to make sure we
+ // reset their debug locations in order to avoid stepping on dead
+ // code caused by folding dead branches.
+ NewBonusInst->setDebugLoc(DebugLoc());
+
RemapInstruction(NewBonusInst, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
VMap[&*BonusInst] = NewBonusInst;
@@ -2763,6 +2814,11 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
// Clone Cond into the predecessor basic block, and or/and the
// two conditions together.
Instruction *CondInPred = Cond->clone();
+
+ // Reset the condition debug location to avoid jumping on dead code
+ // as the result of folding dead branches.
+ CondInPred->setDebugLoc(DebugLoc());
+
RemapInstruction(CondInPred, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
PredBlock->getInstList().insert(PBI->getIterator(), CondInPred);
@@ -2877,13 +2933,18 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
// could replace PBI's branch probabilities with BI's.
// Copy any debug value intrinsics into the end of PredBlock.
- for (Instruction &I : *BB)
- if (isa<DbgInfoIntrinsic>(I))
- I.clone()->insertBefore(PBI);
+ for (Instruction &I : *BB) {
+ if (isa<DbgInfoIntrinsic>(I)) {
+ Instruction *NewI = I.clone();
+ RemapInstruction(NewI, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ NewI->insertBefore(PBI);
+ }
+ }
- return true;
+ return Changed;
}
- return false;
+ return Changed;
}
// If there is only one store in BB1 and BB2, return it, otherwise return
@@ -3024,7 +3085,7 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
return false; // Not in white-list - not worthwhile folding.
// And finally, if this is a non-free instruction that we are okay
// speculating, ensure that we consider the speculation budget.
- BudgetRemaining -= TTI.getUserCost(&I);
+ BudgetRemaining -= TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
if (BudgetRemaining < 0)
return false; // Eagerly refuse to fold as soon as we're out of budget.
}
@@ -3086,29 +3147,11 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
PStore->getAAMetadata(AAMD, /*Merge=*/false);
PStore->getAAMetadata(AAMD, /*Merge=*/true);
SI->setAAMetadata(AAMD);
- unsigned PAlignment = PStore->getAlignment();
- unsigned QAlignment = QStore->getAlignment();
- unsigned TypeAlignment =
- DL.getABITypeAlignment(SI->getValueOperand()->getType());
- unsigned MinAlignment;
- unsigned MaxAlignment;
- std::tie(MinAlignment, MaxAlignment) = std::minmax(PAlignment, QAlignment);
// Choose the minimum alignment. If we could prove both stores execute, we
// could use biggest one. In this case, though, we only know that one of the
// stores executes. And we don't know it's safe to take the alignment from a
// store that doesn't execute.
- if (MinAlignment != 0) {
- // Choose the minimum of all non-zero alignments.
- SI->setAlignment(Align(MinAlignment));
- } else if (MaxAlignment != 0) {
- // Choose the minimal alignment between the non-zero alignment and the ABI
- // default alignment for the type of the stored value.
- SI->setAlignment(Align(std::min(MaxAlignment, TypeAlignment)));
- } else {
- // If both alignments are zero, use ABI default alignment for the type of
- // the stored value.
- SI->setAlignment(Align(TypeAlignment));
- }
+ SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
QStore->eraseFromParent();
PStore->eraseFromParent();
@@ -3514,10 +3557,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// Takes care of updating the successors and removing the old terminator.
// Also makes sure not to introduce new successors by assuming that edges to
// non-successor TrueBBs and FalseBBs aren't reachable.
-static bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
- BasicBlock *TrueBB, BasicBlock *FalseBB,
- uint32_t TrueWeight,
- uint32_t FalseWeight) {
+bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
+ Value *Cond, BasicBlock *TrueBB,
+ BasicBlock *FalseBB,
+ uint32_t TrueWeight,
+ uint32_t FalseWeight) {
// Remove any superfluous successor edges from the CFG.
// First, figure out which successors to preserve.
// If TrueBB and FalseBB are equal, only try to preserve one copy of that
@@ -3577,7 +3621,8 @@ static bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
// (switch (select cond, X, Y)) on constant X, Y
// with a branch - conditional if X and Y lead to distinct BBs,
// unconditional otherwise.
-static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
+bool SimplifyCFGOpt::SimplifySwitchOnSelect(SwitchInst *SI,
+ SelectInst *Select) {
// Check for constant integer values in the select.
ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
@@ -3613,7 +3658,8 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
// blockaddress(@fn, BlockB)))
// with
// (br cond, BlockA, BlockB).
-static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
+bool SimplifyCFGOpt::SimplifyIndirectBrOnSelect(IndirectBrInst *IBI,
+ SelectInst *SI) {
// Check that both operands of the select are block addresses.
BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
@@ -3748,8 +3794,9 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
/// The specified branch is a conditional branch.
/// Check to see if it is branching on an or/and chain of icmp instructions, and
/// fold it into a switch instruction if so.
-static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
- const DataLayout &DL) {
+bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
+ IRBuilder<> &Builder,
+ const DataLayout &DL) {
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
if (!Cond)
return false;
@@ -3863,19 +3910,19 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
return true;
}
-bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
+bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
if (isa<PHINode>(RI->getValue()))
- return SimplifyCommonResume(RI);
+ return simplifyCommonResume(RI);
else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
RI->getValue() == RI->getParent()->getFirstNonPHI())
// The resume must unwind the exception that caused control to branch here.
- return SimplifySingleResume(RI);
+ return simplifySingleResume(RI);
return false;
}
// Simplify resume that is shared by several landing pads (phi of landing pad).
-bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {
+bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
BasicBlock *BB = RI->getParent();
// Check that there are no other instructions except for debug intrinsics
@@ -3953,18 +4000,38 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {
return !TrivialUnwindBlocks.empty();
}
+// Check if cleanup block is empty
+static bool isCleanupBlockEmpty(Instruction *Inst, Instruction *RI) {
+ BasicBlock::iterator I = Inst->getIterator(), E = RI->getIterator();
+ while (++I != E) {
+ auto *II = dyn_cast<IntrinsicInst>(I);
+ if (!II)
+ return false;
+
+ Intrinsic::ID IntrinsicID = II->getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::dbg_label:
+ case Intrinsic::lifetime_end:
+ break;
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+
// Simplify resume that is only used by a single (non-phi) landing pad.
-bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) {
+bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
BasicBlock *BB = RI->getParent();
auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
assert(RI->getValue() == LPInst &&
"Resume must unwind the exception that caused control to here");
// Check that there are no other instructions except for debug intrinsics.
- BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator();
- while (++I != E)
- if (!isa<DbgInfoIntrinsic>(I))
- return false;
+ if (!isCleanupBlockEmpty(LPInst, RI))
+ return false;
// Turn all invokes that unwind here into calls and delete the basic block.
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
@@ -4000,23 +4067,8 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI) {
return false;
// Check that there are no other instructions except for benign intrinsics.
- BasicBlock::iterator I = CPInst->getIterator(), E = RI->getIterator();
- while (++I != E) {
- auto *II = dyn_cast<IntrinsicInst>(I);
- if (!II)
- return false;
-
- Intrinsic::ID IntrinsicID = II->getIntrinsicID();
- switch (IntrinsicID) {
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- case Intrinsic::dbg_label:
- case Intrinsic::lifetime_end:
- break;
- default:
- return false;
- }
- }
+ if (!isCleanupBlockEmpty(CPInst, RI))
+ return false;
// If the cleanup return we are simplifying unwinds to the caller, this will
// set UnwindDest to nullptr.
@@ -4083,9 +4135,10 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI) {
// The iterator must be incremented here because the instructions are
// being moved to another block.
PHINode *PN = cast<PHINode>(I++);
- if (PN->use_empty())
- // If the PHI node has no uses, just leave it. It will be erased
- // when we erase BB below.
+ if (PN->use_empty() || !PN->isUsedOutsideOfBlock(BB))
+ // If the PHI node has no uses or all of its uses are in this basic
+ // block (meaning they are debug or lifetime intrinsics), just leave
+ // it. It will be erased when we erase BB below.
continue;
// Otherwise, sink this PHI node into UnwindDest.
@@ -4148,7 +4201,7 @@ static bool mergeCleanupPad(CleanupReturnInst *RI) {
return true;
}
-bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) {
+bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
// It is possible to transiantly have an undef cleanuppad operand because we
// have deleted some, but not all, dead blocks.
// Eventually, this block will be deleted.
@@ -4164,7 +4217,7 @@ bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) {
return false;
}
-bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
+bool SimplifyCFGOpt::simplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
BasicBlock *BB = RI->getParent();
if (!BB->getFirstNonPHIOrDbg()->isTerminator())
return false;
@@ -4218,7 +4271,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
return false;
}
-bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
+bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
BasicBlock *BB = UI->getParent();
bool Changed = false;
@@ -4393,7 +4446,8 @@ static void createUnreachableSwitchDefault(SwitchInst *Switch) {
/// Turn a switch with two reachable destinations into an integer range
/// comparison and branch.
-static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
+bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
+ IRBuilder<> &Builder) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
bool HasDefault =
@@ -5689,7 +5743,7 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
return true;
}
-bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
+bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
BasicBlock *BB = SI->getParent();
if (isValueEqualityComparison(SI)) {
@@ -5740,7 +5794,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
return false;
}
-bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
+bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
BasicBlock *BB = IBI->getParent();
bool Changed = false;
@@ -5855,7 +5909,12 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
return false;
}
-bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
+bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
+ return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
+ : simplifyCondBranch(Branch, Builder);
+}
+
+bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
IRBuilder<> &Builder) {
BasicBlock *BB = BI->getParent();
BasicBlock *Succ = BI->getSuccessor(0);
@@ -5916,10 +5975,9 @@ static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
return PredPred;
}
-bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
+bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
BasicBlock *BB = BI->getParent();
- const Function *Fn = BB->getParent();
- if (Fn && Fn->hasFnAttribute(Attribute::OptForFuzzing))
+ if (!Options.SimplifyCondBranch)
return false;
// Conditional branch
@@ -6064,9 +6122,9 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
SI->getPointerOperand() == I;
// A call to null is undefined.
- if (auto CS = CallSite(Use))
- return !NullPointerIsDefined(CS->getFunction()) &&
- CS.getCalledValue() == I;
+ if (auto *CB = dyn_cast<CallBase>(Use))
+ return !NullPointerIsDefined(CB->getFunction()) &&
+ CB->getCalledOperand() == I;
}
return false;
}
@@ -6133,39 +6191,38 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
IRBuilder<> Builder(BB);
- // If there is a trivial two-entry PHI node in this basic block, and we can
- // eliminate it, do so now.
- if (auto *PN = dyn_cast<PHINode>(BB->begin()))
- if (PN->getNumIncomingValues() == 2)
- Changed |= FoldTwoEntryPHINode(PN, TTI, DL);
-
- Builder.SetInsertPoint(BB->getTerminator());
- if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
- if (BI->isUnconditional()) {
- if (SimplifyUncondBranch(BI, Builder))
- return true;
- } else {
- if (SimplifyCondBranch(BI, Builder))
- return true;
- }
- } else if (auto *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
- if (SimplifyReturn(RI, Builder))
- return true;
- } else if (auto *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
- if (SimplifyResume(RI, Builder))
- return true;
- } else if (auto *RI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
- if (SimplifyCleanupReturn(RI))
- return true;
- } else if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
- if (SimplifySwitch(SI, Builder))
- return true;
- } else if (auto *UI = dyn_cast<UnreachableInst>(BB->getTerminator())) {
- if (SimplifyUnreachable(UI))
- return true;
- } else if (auto *IBI = dyn_cast<IndirectBrInst>(BB->getTerminator())) {
- if (SimplifyIndirectBr(IBI))
- return true;
+ if (Options.FoldTwoEntryPHINode) {
+ // If there is a trivial two-entry PHI node in this basic block, and we can
+ // eliminate it, do so now.
+ if (auto *PN = dyn_cast<PHINode>(BB->begin()))
+ if (PN->getNumIncomingValues() == 2)
+ Changed |= FoldTwoEntryPHINode(PN, TTI, DL);
+ }
+
+ Instruction *Terminator = BB->getTerminator();
+ Builder.SetInsertPoint(Terminator);
+ switch (Terminator->getOpcode()) {
+ case Instruction::Br:
+ Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
+ break;
+ case Instruction::Ret:
+ Changed |= simplifyReturn(cast<ReturnInst>(Terminator), Builder);
+ break;
+ case Instruction::Resume:
+ Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
+ break;
+ case Instruction::CleanupRet:
+ Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
+ break;
+ case Instruction::Switch:
+ Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
+ break;
+ case Instruction::Unreachable:
+ Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
+ break;
+ case Instruction::IndirectBr:
+ Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
+ break;
}
return Changed;
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index cbb114f9a47aa..d3d0c33419085 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -17,7 +17,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
@@ -27,6 +26,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
using namespace llvm;
@@ -54,6 +54,7 @@ namespace {
LoopInfo *LI;
ScalarEvolution *SE;
DominatorTree *DT;
+ const TargetTransformInfo *TTI;
SCEVExpander &Rewriter;
SmallVectorImpl<WeakTrackingVH> &DeadInsts;
@@ -61,10 +62,11 @@ namespace {
public:
SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
- LoopInfo *LI, SCEVExpander &Rewriter,
+ LoopInfo *LI, const TargetTransformInfo *TTI,
+ SCEVExpander &Rewriter,
SmallVectorImpl<WeakTrackingVH> &Dead)
- : L(Loop), LI(LI), SE(SE), DT(DT), Rewriter(Rewriter), DeadInsts(Dead),
- Changed(false) {
+ : L(Loop), LI(LI), SE(SE), DT(DT), TTI(TTI), Rewriter(Rewriter),
+ DeadInsts(Dead), Changed(false) {
assert(LI && "IV simplification requires LoopInfo");
}
@@ -655,7 +657,7 @@ static Instruction *GetLoopInvariantInsertPosition(Loop *L, Instruction *Hint) {
return Hint;
}
-/// Replace the UseInst with a constant if possible.
+/// Replace the UseInst with a loop invariant expression if it is safe.
bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) {
if (!SE->isSCEVable(I->getType()))
return false;
@@ -667,10 +669,17 @@ bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) {
return false;
// Do not generate something ridiculous even if S is loop invariant.
- if (Rewriter.isHighCostExpansion(S, L, I))
+ if (Rewriter.isHighCostExpansion(S, L, SCEVCheapExpansionBudget, TTI, I))
return false;
auto *IP = GetLoopInvariantInsertPosition(L, I);
+
+ if (!isSafeToExpandAt(S, IP, *SE)) {
+ LLVM_DEBUG(dbgs() << "INDVARS: Can not replace IV user: " << *I
+ << " with non-speculable loop invariant: " << *S << '\n');
+ return false;
+ }
+
auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP);
I->replaceAllUsesWith(Invariant);
@@ -931,10 +940,11 @@ void IVVisitor::anchor() { }
/// Simplify instructions that use this induction variable
/// by using ScalarEvolution to analyze the IV's recurrence.
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
- LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead,
+ LoopInfo *LI, const TargetTransformInfo *TTI,
+ SmallVectorImpl<WeakTrackingVH> &Dead,
SCEVExpander &Rewriter, IVVisitor *V) {
- SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Rewriter,
- Dead);
+ SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, TTI,
+ Rewriter, Dead);
SIV.simplifyUsers(CurrIV, V);
return SIV.hasChanged();
}
@@ -942,14 +952,16 @@ bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
/// Simplify users of induction variables within this
/// loop. This does not actually change or add IVs.
bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
- LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead) {
+ LoopInfo *LI, const TargetTransformInfo *TTI,
+ SmallVectorImpl<WeakTrackingVH> &Dead) {
SCEVExpander Rewriter(*SE, SE->getDataLayout(), "indvars");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
bool Changed = false;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
- Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead, Rewriter);
+ Changed |=
+ simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, TTI, Dead, Rewriter);
}
return Changed;
}
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index fa3a9d21f3dfb..cfcc3454a2102 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -138,28 +138,6 @@ static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) {
return ConstantInt::get(CI->getType(), Result);
}
-static bool isLocallyOpenedFile(Value *File, CallInst *CI, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- CallInst *FOpen = dyn_cast<CallInst>(File);
- if (!FOpen)
- return false;
-
- Function *InnerCallee = FOpen->getCalledFunction();
- if (!InnerCallee)
- return false;
-
- LibFunc Func;
- if (!TLI->getLibFunc(*InnerCallee, Func) || !TLI->has(Func) ||
- Func != LibFunc_fopen)
- return false;
-
- inferLibFuncAttributes(*CI->getCalledFunction(), *TLI);
- if (PointerMayBeCaptured(File, true, true))
- return false;
-
- return true;
-}
-
static bool isOnlyUsedInComparisonWithZero(Value *V) {
for (User *U : V->users()) {
if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
@@ -177,8 +155,7 @@ static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
if (!isOnlyUsedInComparisonWithZero(CI))
return false;
- if (!isDereferenceableAndAlignedPointer(Str, Align::None(), APInt(64, Len),
- DL))
+ if (!isDereferenceableAndAlignedPointer(Str, Align(1), APInt(64, Len), DL))
return false;
if (CI->getFunction()->hasFnAttribute(Attribute::SanitizeMemory))
@@ -252,7 +229,7 @@ static void annotateNonNullAndDereferenceable(CallInst *CI, ArrayRef<unsigned> A
// String and Memory Library Call Optimizations
//===----------------------------------------------------------------------===//
-Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilderBase &B) {
// Extract some information from the instruction
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
@@ -274,7 +251,7 @@ Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
- IRBuilder<> &B) {
+ IRBuilderBase &B) {
// We need to find the end of the destination string. That's where the
// memory is to be moved to. We just generate a call to strlen.
Value *DstLen = emitStrLen(Dst, B, DL, TLI);
@@ -289,12 +266,12 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
B.CreateMemCpy(
- CpyDst, Align::None(), Src, Align::None(),
+ CpyDst, Align(1), Src, Align(1),
ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1));
return Dst;
}
-Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilderBase &B) {
// Extract some information from the instruction.
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
@@ -337,7 +314,7 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) {
return emitStrLenMemCpy(Src, Dst, SrcLen, B);
}
-Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
Function *Callee = CI->getCalledFunction();
FunctionType *FT = Callee->getFunctionType();
Value *SrcStr = CI->getArgOperand(0);
@@ -382,7 +359,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr");
}
-Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilderBase &B) {
Value *SrcStr = CI->getArgOperand(0);
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
annotateNonNullBasedOnAccess(CI, 0);
@@ -410,7 +387,7 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strrchr");
}
-Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilderBase &B) {
Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
if (Str1P == Str2P) // strcmp(x,x) -> 0
return ConstantInt::get(CI->getType(), 0);
@@ -465,7 +442,7 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) {
Value *Str1P = CI->getArgOperand(0);
Value *Str2P = CI->getArgOperand(1);
Value *Size = CI->getArgOperand(2);
@@ -533,7 +510,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizeStrNDup(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStrNDup(CallInst *CI, IRBuilderBase &B) {
Value *Src = CI->getArgOperand(0);
ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1));
uint64_t SrcLen = GetStringLength(Src);
@@ -546,7 +523,7 @@ Value *LibCallSimplifier::optimizeStrNDup(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) {
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) // strcpy(x,x) -> x
return Src;
@@ -562,13 +539,13 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
CallInst *NewCI =
- B.CreateMemCpy(Dst, Align::None(), Src, Align::None(),
+ B.CreateMemCpy(Dst, Align(1), Src, Align(1),
ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
NewCI->setAttributes(CI->getAttributes());
return Dst;
}
-Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
Function *Callee = CI->getCalledFunction();
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
@@ -590,13 +567,12 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
- CallInst *NewCI =
- B.CreateMemCpy(Dst, Align::None(), Src, Align::None(), LenV);
+ CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), LenV);
NewCI->setAttributes(CI->getAttributes());
return DstEnd;
}
-Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) {
Function *Callee = CI->getCalledFunction();
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
@@ -626,7 +602,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
if (SrcLen == 0) {
// strncpy(x, "", y) -> memset(align 1 x, '\0', y)
- CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, Align::None());
+ CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, Align(1));
AttrBuilder ArgAttrs(CI->getAttributes().getParamAttributes(0));
NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
CI->getContext(), 0, ArgAttrs));
@@ -639,13 +615,13 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
Type *PT = Callee->getFunctionType()->getParamType(0);
// strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant]
- CallInst *NewCI = B.CreateMemCpy(Dst, Align::None(), Src, Align::None(),
+ CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1),
ConstantInt::get(DL.getIntPtrType(PT), Len));
NewCI->setAttributes(CI->getAttributes());
return Dst;
}
-Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B,
+Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
unsigned CharSize) {
Value *Src = CI->getArgOperand(0);
@@ -736,14 +712,14 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B,
return nullptr;
}
-Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilderBase &B) {
if (Value *V = optimizeStringLength(CI, B, 8))
return V;
annotateNonNullBasedOnAccess(CI, 0);
return nullptr;
}
-Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilderBase &B) {
Module &M = *CI->getModule();
unsigned WCharSize = TLI->getWCharSize(M) * 8;
// We cannot perform this optimization without wchar_size metadata.
@@ -753,7 +729,7 @@ Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) {
return optimizeStringLength(CI, B, WCharSize);
}
-Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilderBase &B) {
StringRef S1, S2;
bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
@@ -780,7 +756,7 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilderBase &B) {
Value *EndPtr = CI->getArgOperand(1);
if (isa<ConstantPointerNull>(EndPtr)) {
// With a null EndPtr, this function won't capture the main argument.
@@ -791,7 +767,7 @@ Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilderBase &B) {
StringRef S1, S2;
bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
@@ -812,7 +788,7 @@ Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilderBase &B) {
StringRef S1, S2;
bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
@@ -836,7 +812,7 @@ Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) {
// fold strstr(x, x) -> x.
if (CI->getArgOperand(0) == CI->getArgOperand(1))
return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
@@ -893,13 +869,13 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilderBase &B) {
if (isKnownNonZero(CI->getOperand(2), DL))
annotateNonNullBasedOnAccess(CI, 0);
return nullptr;
}
-Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
Value *SrcStr = CI->getArgOperand(0);
Value *Size = CI->getArgOperand(2);
annotateNonNullAndDereferenceable(CI, 0, Size, DL);
@@ -988,7 +964,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
}
static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
- uint64_t Len, IRBuilder<> &B,
+ uint64_t Len, IRBuilderBase &B,
const DataLayout &DL) {
if (Len == 0) // memcmp(s1,s2,0) -> 0
return Constant::getNullValue(CI->getType());
@@ -1065,7 +1041,7 @@ static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
// Most simplifications for memcmp also apply to bcmp.
Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI,
- IRBuilder<> &B) {
+ IRBuilderBase &B) {
Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
Value *Size = CI->getArgOperand(2);
@@ -1088,7 +1064,7 @@ Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI,
return nullptr;
}
-Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilderBase &B) {
if (Value *V = optimizeMemCmpBCmpCommon(CI, B))
return V;
@@ -1105,24 +1081,24 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilderBase &B) {
return optimizeMemCmpBCmpCommon(CI, B);
}
-Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilderBase &B) {
Value *Size = CI->getArgOperand(2);
annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
if (isa<IntrinsicInst>(CI))
return nullptr;
// memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n)
- CallInst *NewCI = B.CreateMemCpy(CI->getArgOperand(0), Align::None(),
- CI->getArgOperand(1), Align::None(), Size);
+ CallInst *NewCI = B.CreateMemCpy(CI->getArgOperand(0), Align(1),
+ CI->getArgOperand(1), Align(1), Size);
NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
-Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilderBase &B) {
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
ConstantInt *StopChar = dyn_cast<ConstantInt>(CI->getArgOperand(2));
@@ -1146,8 +1122,7 @@ Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilder<> &B) {
size_t Pos = SrcStr.find(StopChar->getSExtValue() & 0xFF);
if (Pos == StringRef::npos) {
if (N->getZExtValue() <= SrcStr.size()) {
- B.CreateMemCpy(Dst, Align::None(), Src, Align::None(),
- CI->getArgOperand(3));
+ B.CreateMemCpy(Dst, Align(1), Src, Align(1), CI->getArgOperand(3));
return Constant::getNullValue(CI->getType());
}
return nullptr;
@@ -1156,37 +1131,37 @@ Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilder<> &B) {
Value *NewN =
ConstantInt::get(N->getType(), std::min(uint64_t(Pos + 1), N->getZExtValue()));
// memccpy -> llvm.memcpy
- B.CreateMemCpy(Dst, Align::None(), Src, Align::None(), NewN);
+ B.CreateMemCpy(Dst, Align(1), Src, Align(1), NewN);
return Pos + 1 <= N->getZExtValue()
? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, NewN)
: Constant::getNullValue(CI->getType());
}
-Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilderBase &B) {
Value *Dst = CI->getArgOperand(0);
Value *N = CI->getArgOperand(2);
// mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n
- CallInst *NewCI = B.CreateMemCpy(Dst, Align::None(), CI->getArgOperand(1),
- Align::None(), N);
+ CallInst *NewCI =
+ B.CreateMemCpy(Dst, Align(1), CI->getArgOperand(1), Align(1), N);
NewCI->setAttributes(CI->getAttributes());
return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N);
}
-Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilderBase &B) {
Value *Size = CI->getArgOperand(2);
annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
if (isa<IntrinsicInst>(CI))
return nullptr;
// memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n)
- CallInst *NewCI = B.CreateMemMove(CI->getArgOperand(0), Align::None(),
- CI->getArgOperand(1), Align::None(), Size);
+ CallInst *NewCI = B.CreateMemMove(CI->getArgOperand(0), Align(1),
+ CI->getArgOperand(1), Align(1), Size);
NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
/// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n).
-Value *LibCallSimplifier::foldMallocMemset(CallInst *Memset, IRBuilder<> &B) {
+Value *LibCallSimplifier::foldMallocMemset(CallInst *Memset, IRBuilderBase &B) {
// This has to be a memset of zeros (bzero).
auto *FillValue = dyn_cast<ConstantInt>(Memset->getArgOperand(1));
if (!FillValue || FillValue->getZExtValue() != 0)
@@ -1229,7 +1204,7 @@ Value *LibCallSimplifier::foldMallocMemset(CallInst *Memset, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilderBase &B) {
Value *Size = CI->getArgOperand(2);
annotateNonNullAndDereferenceable(CI, 0, Size, DL);
if (isa<IntrinsicInst>(CI))
@@ -1240,13 +1215,12 @@ Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
// memset(p, v, n) -> llvm.memset(align 1 p, v, n)
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
- CallInst *NewCI =
- B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align::None());
+ CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align(1));
NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
-Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilderBase &B) {
if (isa<ConstantPointerNull>(CI->getArgOperand(0)))
return emitMalloc(CI->getArgOperand(1), B, DL, TLI);
@@ -1258,9 +1232,10 @@ Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilder<> &B) {
//===----------------------------------------------------------------------===//
// Replace a libcall \p CI with a call to intrinsic \p IID
-static Value *replaceUnaryCall(CallInst *CI, IRBuilder<> &B, Intrinsic::ID IID) {
+static Value *replaceUnaryCall(CallInst *CI, IRBuilderBase &B,
+ Intrinsic::ID IID) {
// Propagate fast-math flags from the existing call to the new call.
- IRBuilder<>::FastMathFlagGuard Guard(B);
+ IRBuilderBase::FastMathFlagGuard Guard(B);
B.setFastMathFlags(CI->getFastMathFlags());
Module *M = CI->getModule();
@@ -1294,7 +1269,7 @@ static Value *valueHasFloatPrecision(Value *Val) {
}
/// Shrink double -> float functions.
-static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
+static Value *optimizeDoubleFP(CallInst *CI, IRBuilderBase &B,
bool isBinary, bool isPrecise = false) {
Function *CalleeFn = CI->getCalledFunction();
if (!CI->getType()->isDoubleTy() || !CalleeFn)
@@ -1333,7 +1308,7 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
}
// Propagate the math semantics from the current function to the new function.
- IRBuilder<>::FastMathFlagGuard Guard(B);
+ IRBuilderBase::FastMathFlagGuard Guard(B);
B.setFastMathFlags(CI->getFastMathFlags());
// g((double) float) -> (double) gf(float)
@@ -1352,24 +1327,24 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
}
/// Shrink double -> float for unary functions.
-static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
+static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilderBase &B,
bool isPrecise = false) {
return optimizeDoubleFP(CI, B, false, isPrecise);
}
/// Shrink double -> float for binary functions.
-static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B,
+static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilderBase &B,
bool isPrecise = false) {
return optimizeDoubleFP(CI, B, true, isPrecise);
}
// cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z)))
-Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilderBase &B) {
if (!CI->isFast())
return nullptr;
// Propagate fast-math flags from the existing call to new instructions.
- IRBuilder<>::FastMathFlagGuard Guard(B);
+ IRBuilderBase::FastMathFlagGuard Guard(B);
B.setFastMathFlags(CI->getFastMathFlags());
Value *Real, *Imag;
@@ -1393,11 +1368,11 @@ Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilder<> &B) {
}
static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func,
- IRBuilder<> &B) {
+ IRBuilderBase &B) {
if (!isa<FPMathOperator>(Call))
return nullptr;
- IRBuilder<>::FastMathFlagGuard Guard(B);
+ IRBuilderBase::FastMathFlagGuard Guard(B);
B.setFastMathFlags(Call->getFastMathFlags());
// TODO: Can this be shared to also handle LLVM intrinsics?
@@ -1427,7 +1402,7 @@ static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func,
return nullptr;
}
-static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
+static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilderBase &B) {
// Multiplications calculated using Addition Chains.
// Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html
@@ -1453,7 +1428,7 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
}
// Return a properly extended 32-bit integer if the operation is an itofp.
-static Value *getIntToFPVal(Value *I2F, IRBuilder<> &B) {
+static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B) {
if (isa<SIToFPInst>(I2F) || isa<UIToFPInst>(I2F)) {
Value *Op = cast<Instruction>(I2F)->getOperand(0);
// Make sure that the exponent fits inside an int32_t,
@@ -1471,9 +1446,9 @@ static Value *getIntToFPVal(Value *I2F, IRBuilder<> &B) {
/// Use exp{,2}(x * y) for pow(exp{,2}(x), y);
/// ldexp(1.0, x) for pow(2.0, itofp(x)); exp2(n * x) for pow(2.0 ** n, x);
/// exp10(x) for pow(10.0, x); exp2(log2(n) * x) for pow(n, x).
-Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
+Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
- AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
+ AttributeList Attrs; // Attributes are only meaningful on the original call
Module *Mod = Pow->getModule();
Type *Ty = Pow->getType();
bool Ignored;
@@ -1588,9 +1563,14 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f,
LibFunc_exp10l, B, Attrs);
- // pow(n, x) -> exp2(log2(n) * x)
- if (Pow->hasOneUse() && Pow->hasApproxFunc() && Pow->hasNoNaNs() &&
- Pow->hasNoInfs() && BaseF->isNormal() && !BaseF->isNegative()) {
+ // pow(x, y) -> exp2(log2(x) * y)
+ if (Pow->hasApproxFunc() && Pow->hasNoNaNs() && BaseF->isFiniteNonZero() &&
+ !BaseF->isNegative()) {
+ // pow(1, inf) is defined to be 1 but exp2(log2(1) * inf) evaluates to NaN.
+ // Luckily optimizePow has already handled the x == 1 case.
+ assert(!match(Base, m_FPOne()) &&
+ "pow(1.0, y) should have been simplified earlier!");
+
Value *Log = nullptr;
if (Ty->isFloatTy())
Log = ConstantFP::get(Ty, std::log2(BaseF->convertToFloat()));
@@ -1612,7 +1592,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
}
static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno,
- Module *M, IRBuilder<> &B,
+ Module *M, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
// If errno is never set, then use the intrinsic for sqrt().
if (NoErrno) {
@@ -1633,9 +1613,9 @@ static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno,
}
/// Use square root in place of pow(x, +/-0.5).
-Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
+Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) {
Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
- AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
+ AttributeList Attrs; // Attributes are only meaningful on the original call
Module *Mod = Pow->getModule();
Type *Ty = Pow->getType();
@@ -1676,13 +1656,13 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
}
static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M,
- IRBuilder<> &B) {
+ IRBuilderBase &B) {
Value *Args[] = {Base, Expo};
Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType());
return B.CreateCall(F, Args);
}
-Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
Value *Base = Pow->getArgOperand(0);
Value *Expo = Pow->getArgOperand(1);
Function *Callee = Pow->getCalledFunction();
@@ -1693,12 +1673,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
bool AllowApprox = Pow->hasApproxFunc();
bool Ignored;
- // Bail out if simplifying libcalls to pow() is disabled.
- if (!hasFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl))
- return nullptr;
-
// Propagate the math semantics from the call to any created instructions.
- IRBuilder<>::FastMathFlagGuard Guard(B);
+ IRBuilderBase::FastMathFlagGuard Guard(B);
B.setFastMathFlags(Pow->getFastMathFlags());
// Shrink pow() to powf() if the arguments are single precision,
@@ -1748,7 +1724,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
// be different) and it should also consider optimizing for size.
APFloat LimF(ExpoF->getSemantics(), 33),
ExpoA(abs(*ExpoF));
- if (ExpoA.compare(LimF) == APFloat::cmpLessThan) {
+ if (ExpoA < LimF) {
// This transformation applies to integer or integer+0.5 exponents only.
// For integer+0.5, we create a sqrt(Base) call.
Value *Sqrt = nullptr;
@@ -1807,8 +1783,9 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
return Shrunk;
}
-Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
Function *Callee = CI->getCalledFunction();
+ AttributeList Attrs; // Attributes are only meaningful on the original call
StringRef Name = Callee->getName();
Value *Ret = nullptr;
if (UnsafeFPShrink && Name == TLI->getName(LibFunc_exp2) &&
@@ -1825,13 +1802,13 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
if (Value *Exp = getIntToFPVal(Op, B))
return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI,
LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl,
- B, CI->getCalledFunction()->getAttributes());
+ B, Attrs);
}
return Ret;
}
-Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilderBase &B) {
// If we can shrink the call to a float function rather than a double
// function, do that first.
Function *Callee = CI->getCalledFunction();
@@ -1847,7 +1824,7 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
// "Ideally, fmax would be sensitive to the sign of zero, for example
// fmax(-0.0, +0.0) would return +0; however, implementation in software
// might be impractical."
- IRBuilder<>::FastMathFlagGuard Guard(B);
+ IRBuilderBase::FastMathFlagGuard Guard(B);
FastMathFlags FMF = CI->getFastMathFlags();
FMF.setNoSignedZeros();
B.setFastMathFlags(FMF);
@@ -1858,9 +1835,9 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) });
}
-Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
Function *LogFn = Log->getCalledFunction();
- AttributeList Attrs = LogFn->getAttributes();
+ AttributeList Attrs; // Attributes are only meaningful on the original call
StringRef LogNm = LogFn->getName();
Intrinsic::ID LogID = LogFn->getIntrinsicID();
Module *Mod = Log->getModule();
@@ -1963,12 +1940,12 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilder<> &B) {
} else
return Ret;
- IRBuilder<>::FastMathFlagGuard Guard(B);
+ IRBuilderBase::FastMathFlagGuard Guard(B);
B.setFastMathFlags(FastMathFlags::getFast());
Intrinsic::ID ArgID = Arg->getIntrinsicID();
LibFunc ArgLb = NotLibFunc;
- TLI->getLibFunc(Arg, ArgLb);
+ TLI->getLibFunc(*Arg, ArgLb);
// log(pow(x,y)) -> y*log(x)
if (ArgLb == PowLb || ArgID == Intrinsic::pow) {
@@ -2010,7 +1987,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilder<> &B) {
return Ret;
}
-Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
Function *Callee = CI->getCalledFunction();
Value *Ret = nullptr;
// TODO: Once we have a way (other than checking for the existince of the
@@ -2058,7 +2035,7 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
// Fast math flags for any created instructions should match the sqrt
// and multiply.
- IRBuilder<>::FastMathFlagGuard Guard(B);
+ IRBuilderBase::FastMathFlagGuard Guard(B);
B.setFastMathFlags(I->getFastMathFlags());
// If we found a repeated factor, hoist it out of the square root and
@@ -2079,7 +2056,7 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
}
// TODO: Generalize to handle any trig function and its inverse.
-Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilderBase &B) {
Function *Callee = CI->getCalledFunction();
Value *Ret = nullptr;
StringRef Name = Callee->getName();
@@ -2116,7 +2093,7 @@ static bool isTrigLibCall(CallInst *CI) {
CI->hasFnAttr(Attribute::ReadNone);
}
-static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
+static void insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg,
bool UseFloat, Value *&Sin, Value *&Cos,
Value *&SinCos) {
Type *ArgTy = Arg->getType();
@@ -2131,7 +2108,7 @@ static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
// x86_64 can't use {float, float} since that would be returned in both
// xmm0 and xmm1, which isn't what a real struct would do.
ResTy = T.getArch() == Triple::x86_64
- ? static_cast<Type *>(VectorType::get(ArgTy, 2))
+ ? static_cast<Type *>(FixedVectorType::get(ArgTy, 2))
: static_cast<Type *>(StructType::get(ArgTy, ArgTy));
} else {
Name = "__sincospi_stret";
@@ -2166,7 +2143,7 @@ static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
}
}
-Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) {
// Make sure the prototype is as expected, otherwise the rest of the
// function is probably invalid and likely to abort.
if (!isTrigLibCall(CI))
@@ -2247,7 +2224,7 @@ void LibCallSimplifier::classifyArgUse(
// Integer Library Call Optimizations
//===----------------------------------------------------------------------===//
-Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilderBase &B) {
// ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
Value *Op = CI->getArgOperand(0);
Type *ArgType = Op->getType();
@@ -2261,7 +2238,7 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
return B.CreateSelect(Cond, V, B.getInt32(0));
}
-Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilderBase &B) {
// fls(x) -> (i32)(sizeInBits(x) - llvm.ctlz(x, false))
Value *Op = CI->getArgOperand(0);
Type *ArgType = Op->getType();
@@ -2273,7 +2250,7 @@ Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilder<> &B) {
return B.CreateIntCast(V, CI->getType(), false);
}
-Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilderBase &B) {
// abs(x) -> x <s 0 ? -x : x
// The negation has 'nsw' because abs of INT_MIN is undefined.
Value *X = CI->getArgOperand(0);
@@ -2282,7 +2259,7 @@ Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) {
return B.CreateSelect(IsNeg, NegX, X);
}
-Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilderBase &B) {
// isdigit(c) -> (c-'0') <u 10
Value *Op = CI->getArgOperand(0);
Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
@@ -2290,20 +2267,20 @@ Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) {
return B.CreateZExt(Op, CI->getType());
}
-Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilderBase &B) {
// isascii(c) -> c <u 128
Value *Op = CI->getArgOperand(0);
Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
return B.CreateZExt(Op, CI->getType());
}
-Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilderBase &B) {
// toascii(c) -> c & 0x7f
return B.CreateAnd(CI->getArgOperand(0),
ConstantInt::get(CI->getType(), 0x7F));
}
-Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilderBase &B) {
StringRef Str;
if (!getConstantStringInfo(CI->getArgOperand(0), Str))
return nullptr;
@@ -2311,7 +2288,7 @@ Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilder<> &B) {
return convertStrToNumber(CI, Str, 10);
}
-Value *LibCallSimplifier::optimizeStrtol(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStrtol(CallInst *CI, IRBuilderBase &B) {
StringRef Str;
if (!getConstantStringInfo(CI->getArgOperand(0), Str))
return nullptr;
@@ -2332,7 +2309,7 @@ Value *LibCallSimplifier::optimizeStrtol(CallInst *CI, IRBuilder<> &B) {
static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg);
-Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B,
+Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilderBase &B,
int StreamArg) {
Function *Callee = CI->getCalledFunction();
// Error reporting calls should be cold, mark them as such.
@@ -2372,7 +2349,7 @@ static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) {
return GV->getName() == "stderr";
}
-Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) {
// Check for a fixed format string.
StringRef FormatStr;
if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
@@ -2425,7 +2402,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilderBase &B) {
Function *Callee = CI->getCalledFunction();
FunctionType *FT = Callee->getFunctionType();
@@ -2462,7 +2439,8 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
+ IRBuilderBase &B) {
// Check for a fixed format string.
StringRef FormatStr;
if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
@@ -2477,8 +2455,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
// sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1)
B.CreateMemCpy(
- CI->getArgOperand(0), Align::None(), CI->getArgOperand(1),
- Align::None(),
+ CI->getArgOperand(0), Align(1), CI->getArgOperand(1), Align(1),
ConstantInt::get(DL.getIntPtrType(CI->getContext()),
FormatStr.size() + 1)); // Copy the null byte.
return ConstantInt::get(CI->getType(), FormatStr.size());
@@ -2515,8 +2492,8 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
return nullptr;
Value *IncLen =
B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc");
- B.CreateMemCpy(CI->getArgOperand(0), Align::None(), CI->getArgOperand(2),
- Align::None(), IncLen);
+ B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(2),
+ Align(1), IncLen);
// The sprintf result is the unincremented number of bytes in the string.
return B.CreateIntCast(Len, CI->getType(), false);
@@ -2524,7 +2501,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilderBase &B) {
Function *Callee = CI->getCalledFunction();
FunctionType *FT = Callee->getFunctionType();
if (Value *V = optimizeSPrintFString(CI, B)) {
@@ -2560,7 +2537,8 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI,
+ IRBuilderBase &B) {
// Check for size
ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1));
if (!Size)
@@ -2587,8 +2565,7 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) {
// snprintf(dst, size, fmt) -> llvm.memcpy(align 1 dst, align 1 fmt,
// strlen(fmt)+1)
B.CreateMemCpy(
- CI->getArgOperand(0), Align::None(), CI->getArgOperand(2),
- Align::None(),
+ CI->getArgOperand(0), Align(1), CI->getArgOperand(2), Align(1),
ConstantInt::get(DL.getIntPtrType(CI->getContext()),
FormatStr.size() + 1)); // Copy the null byte.
return ConstantInt::get(CI->getType(), FormatStr.size());
@@ -2629,9 +2606,8 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) {
else if (N < Str.size() + 1)
return nullptr;
- B.CreateMemCpy(CI->getArgOperand(0), Align::None(), CI->getArgOperand(3),
- Align::None(),
- ConstantInt::get(CI->getType(), Str.size() + 1));
+ B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(3),
+ Align(1), ConstantInt::get(CI->getType(), Str.size() + 1));
// The snprintf result is the unincremented number of bytes in the string.
return ConstantInt::get(CI->getType(), Str.size());
@@ -2640,7 +2616,7 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilderBase &B) {
if (Value *V = optimizeSnPrintFString(CI, B)) {
return V;
}
@@ -2650,7 +2626,8 @@ Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI,
+ IRBuilderBase &B) {
optimizeErrorReporting(CI, B, 0);
// All the optimizations depend on the format string.
@@ -2699,7 +2676,7 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilderBase &B) {
Function *Callee = CI->getCalledFunction();
FunctionType *FT = Callee->getFunctionType();
if (Value *V = optimizeFPrintFString(CI, B)) {
@@ -2734,7 +2711,7 @@ Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilderBase &B) {
optimizeErrorReporting(CI, B, 3);
// Get the element size and count.
@@ -2757,15 +2734,10 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) {
}
}
- if (isLocallyOpenedFile(CI->getArgOperand(3), CI, B, TLI))
- return emitFWriteUnlocked(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), CI->getArgOperand(3), B, DL,
- TLI);
-
return nullptr;
}
-Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilderBase &B) {
optimizeErrorReporting(CI, B, 1);
// Don't rewrite fputs to fwrite when optimising for size because fwrite
@@ -2776,15 +2748,9 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
if (OptForSize)
return nullptr;
- // Check if has any use
- if (!CI->use_empty()) {
- if (isLocallyOpenedFile(CI->getArgOperand(1), CI, B, TLI))
- return emitFPutSUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), B,
- TLI);
- else
- // We can't optimize if return value is used.
- return nullptr;
- }
+ // We can't optimize if return value is used.
+ if (!CI->use_empty())
+ return nullptr;
// fputs(s,F) --> fwrite(s,strlen(s),1,F)
uint64_t Len = GetStringLength(CI->getArgOperand(0));
@@ -2798,41 +2764,7 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
CI->getArgOperand(1), B, DL, TLI);
}
-Value *LibCallSimplifier::optimizeFPutc(CallInst *CI, IRBuilder<> &B) {
- optimizeErrorReporting(CI, B, 1);
-
- if (isLocallyOpenedFile(CI->getArgOperand(1), CI, B, TLI))
- return emitFPutCUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), B,
- TLI);
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeFGetc(CallInst *CI, IRBuilder<> &B) {
- if (isLocallyOpenedFile(CI->getArgOperand(0), CI, B, TLI))
- return emitFGetCUnlocked(CI->getArgOperand(0), B, TLI);
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeFGets(CallInst *CI, IRBuilder<> &B) {
- if (isLocallyOpenedFile(CI->getArgOperand(2), CI, B, TLI))
- return emitFGetSUnlocked(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TLI);
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeFRead(CallInst *CI, IRBuilder<> &B) {
- if (isLocallyOpenedFile(CI->getArgOperand(3), CI, B, TLI))
- return emitFReadUnlocked(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), CI->getArgOperand(3), B, DL,
- TLI);
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilderBase &B) {
annotateNonNullBasedOnAccess(CI, 0);
if (!CI->use_empty())
return nullptr;
@@ -2846,11 +2778,10 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilderBase &B) {
// bcopy(src, dst, n) -> llvm.memmove(dst, src, n)
- return B.CreateMemMove(CI->getArgOperand(1), Align::None(),
- CI->getArgOperand(0), Align::None(),
- CI->getArgOperand(2));
+ return B.CreateMemMove(CI->getArgOperand(1), Align(1), CI->getArgOperand(0),
+ Align(1), CI->getArgOperand(2));
}
bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
@@ -2863,7 +2794,7 @@ bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
}
Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
- IRBuilder<> &Builder) {
+ IRBuilderBase &Builder) {
LibFunc Func;
Function *Callee = CI->getCalledFunction();
// Check for string/memory library functions.
@@ -2944,7 +2875,7 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
LibFunc Func,
- IRBuilder<> &Builder) {
+ IRBuilderBase &Builder) {
// Don't optimize calls that require strict floating point semantics.
if (CI->isStrictFP())
return nullptr;
@@ -3000,6 +2931,8 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
return replaceUnaryCall(CI, Builder, Intrinsic::floor);
case LibFunc_round:
return replaceUnaryCall(CI, Builder, Intrinsic::round);
+ case LibFunc_roundeven:
+ return replaceUnaryCall(CI, Builder, Intrinsic::roundeven);
case LibFunc_nearbyint:
return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint);
case LibFunc_rint:
@@ -3044,7 +2977,7 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
}
}
-Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
+Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
// TODO: Split out the code below that operates on FP calls so that
// we can all non-FP calls with the StrictFP attribute to be
// optimized.
@@ -3053,11 +2986,13 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
LibFunc Func;
Function *Callee = CI->getCalledFunction();
+ bool isCallingConvC = isCallingConvCCompatible(CI);
SmallVector<OperandBundleDef, 2> OpBundles;
CI->getOperandBundlesAsDefs(OpBundles);
- IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles);
- bool isCallingConvC = isCallingConvCCompatible(CI);
+
+ IRBuilderBase::OperandBundlesGuard Guard(Builder);
+ Builder.setDefaultOperandBundles(OpBundles);
// Command-line parameter overrides instruction attribute.
// This can't be moved to optimizeFloatingPointLibCall() because it may be
@@ -3097,14 +3032,20 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
}
// Also try to simplify calls to fortified library functions.
- if (Value *SimplifiedFortifiedCI = FortifiedSimplifier.optimizeCall(CI)) {
+ if (Value *SimplifiedFortifiedCI =
+ FortifiedSimplifier.optimizeCall(CI, Builder)) {
// Try to further simplify the result.
CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI);
if (SimplifiedCI && SimplifiedCI->getCalledFunction()) {
- // Use an IR Builder from SimplifiedCI if available instead of CI
- // to guarantee we reach all uses we might replace later on.
- IRBuilder<> TmpBuilder(SimplifiedCI);
- if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) {
+ // Ensure that SimplifiedCI's uses are complete, since some calls have
+ // their uses analyzed.
+ replaceAllUsesWith(CI, SimplifiedCI);
+
+ // Set insertion point to SimplifiedCI to guarantee we reach all uses
+ // we might replace later on.
+ IRBuilderBase::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(SimplifiedCI);
+ if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder)) {
// If we were able to further simplify, remove the now redundant call.
substituteInParent(SimplifiedCI, V);
return V;
@@ -3158,16 +3099,8 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
return optimizeFPrintF(CI, Builder);
case LibFunc_fwrite:
return optimizeFWrite(CI, Builder);
- case LibFunc_fread:
- return optimizeFRead(CI, Builder);
case LibFunc_fputs:
return optimizeFPuts(CI, Builder);
- case LibFunc_fgets:
- return optimizeFGets(CI, Builder);
- case LibFunc_fputc:
- return optimizeFPutc(CI, Builder);
- case LibFunc_fgetc:
- return optimizeFGetc(CI, Builder);
case LibFunc_puts:
return optimizePuts(CI, Builder);
case LibFunc_perror:
@@ -3280,11 +3213,11 @@ FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
}
Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
- IRBuilder<> &B) {
+ IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 3, 2)) {
- CallInst *NewCI = B.CreateMemCpy(CI->getArgOperand(0), Align::None(),
- CI->getArgOperand(1), Align::None(),
- CI->getArgOperand(2));
+ CallInst *NewCI =
+ B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(1),
+ Align(1), CI->getArgOperand(2));
NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
@@ -3292,11 +3225,11 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
}
Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
- IRBuilder<> &B) {
+ IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 3, 2)) {
- CallInst *NewCI = B.CreateMemMove(CI->getArgOperand(0), Align::None(),
- CI->getArgOperand(1), Align::None(),
- CI->getArgOperand(2));
+ CallInst *NewCI =
+ B.CreateMemMove(CI->getArgOperand(0), Align(1), CI->getArgOperand(1),
+ Align(1), CI->getArgOperand(2));
NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
@@ -3304,13 +3237,13 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
}
Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
- IRBuilder<> &B) {
+ IRBuilderBase &B) {
// TODO: Try foldMallocMemset() here.
if (isFortifiedCallFoldable(CI, 3, 2)) {
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val,
- CI->getArgOperand(2), Align::None());
+ CI->getArgOperand(2), Align(1));
NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
@@ -3318,7 +3251,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
}
Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
- IRBuilder<> &B,
+ IRBuilderBase &B,
LibFunc Func) {
const DataLayout &DL = CI->getModule()->getDataLayout();
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1),
@@ -3362,8 +3295,16 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
return Ret;
}
+Value *FortifiedLibCallSimplifier::optimizeStrLenChk(CallInst *CI,
+ IRBuilderBase &B) {
+ if (isFortifiedCallFoldable(CI, 1, None, 0))
+ return emitStrLen(CI->getArgOperand(0), B, CI->getModule()->getDataLayout(),
+ TLI);
+ return nullptr;
+}
+
Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
- IRBuilder<> &B,
+ IRBuilderBase &B,
LibFunc Func) {
if (isFortifiedCallFoldable(CI, 3, 2)) {
if (Func == LibFunc_strncpy_chk)
@@ -3378,7 +3319,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
}
Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI,
- IRBuilder<> &B) {
+ IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 4, 3))
return emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), CI->getArgOperand(3), B, TLI);
@@ -3387,7 +3328,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI,
}
Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI,
- IRBuilder<> &B) {
+ IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) {
SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 5, CI->arg_end());
return emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
@@ -3398,7 +3339,7 @@ Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI,
}
Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI,
- IRBuilder<> &B) {
+ IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 2, None, None, 1)) {
SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 4, CI->arg_end());
return emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), VariadicArgs,
@@ -3409,7 +3350,7 @@ Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI,
}
Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI,
- IRBuilder<> &B) {
+ IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 2))
return emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI);
@@ -3417,7 +3358,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI,
}
Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI,
- IRBuilder<> &B) {
+ IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 3))
return emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), B, TLI);
@@ -3426,7 +3367,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI,
}
Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI,
- IRBuilder<> &B) {
+ IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 3))
return emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), B, TLI);
@@ -3435,7 +3376,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI,
}
Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI,
- IRBuilder<> &B) {
+ IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 3))
return emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), B, TLI);
@@ -3444,7 +3385,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI,
}
Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI,
- IRBuilder<> &B) {
+ IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 3, 1, None, 2))
return emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(4), CI->getArgOperand(5), B, TLI);
@@ -3453,7 +3394,7 @@ Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI,
}
Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI,
- IRBuilder<> &B) {
+ IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 2, None, None, 1))
return emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
CI->getArgOperand(4), B, TLI);
@@ -3461,7 +3402,8 @@ Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI,
return nullptr;
}
-Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
+Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI,
+ IRBuilderBase &Builder) {
// FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here.
// Some clang users checked for _chk libcall availability using:
// __has_builtin(__builtin___memcpy_chk)
@@ -3477,11 +3419,13 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
LibFunc Func;
Function *Callee = CI->getCalledFunction();
+ bool isCallingConvC = isCallingConvCCompatible(CI);
SmallVector<OperandBundleDef, 2> OpBundles;
CI->getOperandBundlesAsDefs(OpBundles);
- IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles);
- bool isCallingConvC = isCallingConvCCompatible(CI);
+
+ IRBuilderBase::OperandBundlesGuard Guard(Builder);
+ Builder.setDefaultOperandBundles(OpBundles);
// First, check that this is a known library functions and that the prototype
// is correct.
@@ -3502,6 +3446,8 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
case LibFunc_stpcpy_chk:
case LibFunc_strcpy_chk:
return optimizeStrpCpyChk(CI, Builder, Func);
+ case LibFunc_strlen_chk:
+ return optimizeStrLenChk(CI, Builder);
case LibFunc_stpncpy_chk:
case LibFunc_strncpy_chk:
return optimizeStrpNCpyChk(CI, Builder, Func);
diff --git a/llvm/lib/Transforms/Utils/SizeOpts.cpp b/llvm/lib/Transforms/Utils/SizeOpts.cpp
index d2a400027d4b7..e257c5a015f51 100644
--- a/llvm/lib/Transforms/Utils/SizeOpts.cpp
+++ b/llvm/lib/Transforms/Utils/SizeOpts.cpp
@@ -24,10 +24,25 @@ cl::opt<bool> PGSOLargeWorkingSetSizeOnly(
"if the working set size is large (except for cold code.)"));
cl::opt<bool> PGSOColdCodeOnly(
- "pgso-cold-code-only", cl::Hidden, cl::init(true),
+ "pgso-cold-code-only", cl::Hidden, cl::init(false),
cl::desc("Apply the profile guided size optimizations only "
"to cold code."));
+cl::opt<bool> PGSOColdCodeOnlyForInstrPGO(
+ "pgso-cold-code-only-for-instr-pgo", cl::Hidden, cl::init(false),
+ cl::desc("Apply the profile guided size optimizations only "
+ "to cold code under instrumentation PGO."));
+
+cl::opt<bool> PGSOColdCodeOnlyForSamplePGO(
+ "pgso-cold-code-only-for-sample-pgo", cl::Hidden, cl::init(false),
+ cl::desc("Apply the profile guided size optimizations only "
+ "to cold code under sample PGO."));
+
+cl::opt<bool> PGSOColdCodeOnlyForPartialSamplePGO(
+ "pgso-cold-code-only-for-partial-sample-pgo", cl::Hidden, cl::init(false),
+ cl::desc("Apply the profile guided size optimizations only "
+ "to cold code under partial-profile sample PGO."));
+
cl::opt<bool> PGSOIRPassOrTestOnly(
"pgso-ir-pass-or-test-only", cl::Hidden, cl::init(false),
cl::desc("Apply the profile guided size optimizations only"
@@ -38,12 +53,12 @@ cl::opt<bool> ForcePGSO(
cl::desc("Force the (profiled-guided) size optimizations. "));
cl::opt<int> PgsoCutoffInstrProf(
- "pgso-cutoff-instr-prof", cl::Hidden, cl::init(250000), cl::ZeroOrMore,
+ "pgso-cutoff-instr-prof", cl::Hidden, cl::init(950000), cl::ZeroOrMore,
cl::desc("The profile guided size optimization profile summary cutoff "
"for instrumentation profile."));
cl::opt<int> PgsoCutoffSampleProf(
- "pgso-cutoff-sample-prof", cl::Hidden, cl::init(800000), cl::ZeroOrMore,
+ "pgso-cutoff-sample-prof", cl::Hidden, cl::init(990000), cl::ZeroOrMore,
cl::desc("The profile guided size optimization profile summary cutoff "
"for sample profile."));
@@ -60,6 +75,12 @@ struct BasicBlockBFIAdapter {
BlockFrequencyInfo &BFI) {
return PSI->isFunctionHotInCallGraphNthPercentile(CutOff, F, BFI);
}
+ static bool isFunctionColdInCallGraphNthPercentile(int CutOff,
+ const Function *F,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo &BFI) {
+ return PSI->isFunctionColdInCallGraphNthPercentile(CutOff, F, BFI);
+ }
static bool isColdBlock(const BasicBlock *BB,
ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) {
@@ -71,6 +92,11 @@ struct BasicBlockBFIAdapter {
BlockFrequencyInfo *BFI) {
return PSI->isHotBlockNthPercentile(CutOff, BB, BFI);
}
+ static bool isColdBlockNthPercentile(int CutOff, const BasicBlock *BB,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
+ return PSI->isColdBlockNthPercentile(CutOff, BB, BFI);
+ }
};
} // end anonymous namespace
@@ -84,6 +110,7 @@ bool llvm::shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI,
bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI,
PGSOQueryType QueryType) {
+ assert(BB);
return shouldOptimizeForSizeImpl<BasicBlockBFIAdapter>(BB, PSI, BFI,
QueryType);
}
diff --git a/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
index 7880ea1c6c479..b559811d120bc 100644
--- a/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
+++ b/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
@@ -48,7 +48,7 @@ bool StripGCRelocates::runOnFunction(Function &F) {
// i.e. not bound to a single statepoint token.
for (Instruction &I : instructions(F)) {
if (auto *GCR = dyn_cast<GCRelocateInst>(&I))
- if (isStatepoint(GCR->getOperand(0)))
+ if (isa<GCStatepointInst>(GCR->getOperand(0)))
GCRelocates.push_back(GCR);
}
// All gc.relocates are bound to a single statepoint token. The order of
diff --git a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
index aacf81d835193..ec4ea848a5d4a 100644
--- a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -117,8 +117,9 @@ public:
const std::string Target;
ExplicitRewriteDescriptor(StringRef S, StringRef T, const bool Naked)
- : RewriteDescriptor(DT), Source(Naked ? StringRef("\01" + S.str()) : S),
- Target(T) {}
+ : RewriteDescriptor(DT),
+ Source(std::string(Naked ? StringRef("\01" + S.str()) : S)),
+ Target(std::string(T)) {}
bool performOnModule(Module &M) override;
@@ -159,7 +160,8 @@ public:
const std::string Transform;
PatternRewriteDescriptor(StringRef P, StringRef T)
- : RewriteDescriptor(DT), Pattern(P), Transform(T) { }
+ : RewriteDescriptor(DT), Pattern(std::string(P)),
+ Transform(std::string(T)) {}
bool performOnModule(Module &M) override;
@@ -189,7 +191,7 @@ performOnModule(Module &M) {
continue;
if (GlobalObject *GO = dyn_cast<GlobalObject>(&C))
- rewriteComdat(M, GO, C.getName(), Name);
+ rewriteComdat(M, GO, std::string(C.getName()), Name);
if (Value *V = (M.*Get)(Name))
C.setValueName(V->getValueName());
@@ -352,19 +354,19 @@ parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
if (KeyValue.equals("source")) {
std::string Error;
- Source = Value->getValue(ValueStorage);
+ Source = std::string(Value->getValue(ValueStorage));
if (!Regex(Source).isValid(Error)) {
YS.printError(Field.getKey(), "invalid regex: " + Error);
return false;
}
} else if (KeyValue.equals("target")) {
- Target = Value->getValue(ValueStorage);
+ Target = std::string(Value->getValue(ValueStorage));
} else if (KeyValue.equals("transform")) {
- Transform = Value->getValue(ValueStorage);
+ Transform = std::string(Value->getValue(ValueStorage));
} else if (KeyValue.equals("naked")) {
std::string Undecorated;
- Undecorated = Value->getValue(ValueStorage);
+ Undecorated = std::string(Value->getValue(ValueStorage));
Naked = StringRef(Undecorated).lower() == "true" || Undecorated == "1";
} else {
YS.printError(Field.getKey(), "unknown key for function");
@@ -421,15 +423,15 @@ parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
if (KeyValue.equals("source")) {
std::string Error;
- Source = Value->getValue(ValueStorage);
+ Source = std::string(Value->getValue(ValueStorage));
if (!Regex(Source).isValid(Error)) {
YS.printError(Field.getKey(), "invalid regex: " + Error);
return false;
}
} else if (KeyValue.equals("target")) {
- Target = Value->getValue(ValueStorage);
+ Target = std::string(Value->getValue(ValueStorage));
} else if (KeyValue.equals("transform")) {
- Transform = Value->getValue(ValueStorage);
+ Transform = std::string(Value->getValue(ValueStorage));
} else {
YS.printError(Field.getKey(), "unknown Key for Global Variable");
return false;
@@ -484,15 +486,15 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
if (KeyValue.equals("source")) {
std::string Error;
- Source = Value->getValue(ValueStorage);
+ Source = std::string(Value->getValue(ValueStorage));
if (!Regex(Source).isValid(Error)) {
YS.printError(Field.getKey(), "invalid regex: " + Error);
return false;
}
} else if (KeyValue.equals("target")) {
- Target = Value->getValue(ValueStorage);
+ Target = std::string(Value->getValue(ValueStorage));
} else if (KeyValue.equals("transform")) {
- Transform = Value->getValue(ValueStorage);
+ Transform = std::string(Value->getValue(ValueStorage));
} else {
YS.printError(Field.getKey(), "unknown key for Global Alias");
return false;
diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
new file mode 100644
index 0000000000000..b10deee3907c7
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -0,0 +1,220 @@
+//===- UnifyLoopExits.cpp - Redirect exiting edges to one block -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// For each natural loop with multiple exit blocks, this pass creates a new
+// block N such that all exiting blocks now branch to N, and then control flow
+// is redistributed to all the original exit blocks.
+//
+// Limitation: This assumes that all terminators in the CFG are direct branches
+// (the "br" instruction). The presence of any other control flow
+// such as indirectbr, switch or callbr will cause an assert.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#define DEBUG_TYPE "unify-loop-exits"
+
+using namespace llvm;
+
+namespace {
+struct UnifyLoopExits : public FunctionPass {
+ static char ID;
+ UnifyLoopExits() : FunctionPass(ID) {
+ initializeUnifyLoopExitsPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequiredID(LowerSwitchID);
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreservedID(LowerSwitchID);
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override;
+};
+} // namespace
+
+char UnifyLoopExits::ID = 0;
+
+FunctionPass *llvm::createUnifyLoopExitsPass() { return new UnifyLoopExits(); }
+
+INITIALIZE_PASS_BEGIN(UnifyLoopExits, "unify-loop-exits",
+ "Fixup each natural loop to have a single exit block",
+ false /* Only looks at CFG */, false /* Analysis Pass */)
+INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(UnifyLoopExits, "unify-loop-exits",
+ "Fixup each natural loop to have a single exit block",
+ false /* Only looks at CFG */, false /* Analysis Pass */)
+
+// The current transform introduces new control flow paths which may break the
+// SSA requirement that every def must dominate all its uses. For example,
+// consider a value D defined inside the loop that is used by some instruction
+// U outside the loop. It follows that D dominates U, since the original
+// program has valid SSA form. After merging the exits, all paths from D to U
+// now flow through the unified exit block. In addition, there may be other
+// paths that do not pass through D, but now reach the unified exit
+// block. Thus, D no longer dominates U.
+//
+// Restore the dominance by creating a phi for each such D at the new unified
+// loop exit. But when doing this, ignore any uses U that are in the new unified
+// loop exit, since those were introduced specially when the block was created.
+//
+// The use of SSAUpdater seems like overkill for this operation. The location
+// for creating the new PHI is well-known, and also the set of incoming blocks
+// to the new PHI.
+static void restoreSSA(const DominatorTree &DT, const Loop *L,
+ const SetVector<BasicBlock *> &Incoming,
+ BasicBlock *LoopExitBlock) {
+ using InstVector = SmallVector<Instruction *, 8>;
+ using IIMap = DenseMap<Instruction *, InstVector>;
+ IIMap ExternalUsers;
+ for (auto BB : L->blocks()) {
+ for (auto &I : *BB) {
+ for (auto &U : I.uses()) {
+ auto UserInst = cast<Instruction>(U.getUser());
+ auto UserBlock = UserInst->getParent();
+ if (UserBlock == LoopExitBlock)
+ continue;
+ if (L->contains(UserBlock))
+ continue;
+ LLVM_DEBUG(dbgs() << "added ext use for " << I.getName() << "("
+ << BB->getName() << ")"
+ << ": " << UserInst->getName() << "("
+ << UserBlock->getName() << ")"
+ << "\n");
+ ExternalUsers[&I].push_back(UserInst);
+ }
+ }
+ }
+
+ for (auto II : ExternalUsers) {
+ // For each Def used outside the loop, create NewPhi in
+ // LoopExitBlock. NewPhi receives Def only along exiting blocks that
+ // dominate it, while the remaining values are undefined since those paths
+ // didn't exist in the original CFG.
+ auto Def = II.first;
+ LLVM_DEBUG(dbgs() << "externally used: " << Def->getName() << "\n");
+ auto NewPhi = PHINode::Create(Def->getType(), Incoming.size(),
+ Def->getName() + ".moved",
+ LoopExitBlock->getTerminator());
+ for (auto In : Incoming) {
+ LLVM_DEBUG(dbgs() << "predecessor " << In->getName() << ": ");
+ if (Def->getParent() == In || DT.dominates(Def, In)) {
+ LLVM_DEBUG(dbgs() << "dominated\n");
+ NewPhi->addIncoming(Def, In);
+ } else {
+ LLVM_DEBUG(dbgs() << "not dominated\n");
+ NewPhi->addIncoming(UndefValue::get(Def->getType()), In);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "external users:");
+ for (auto U : II.second) {
+ LLVM_DEBUG(dbgs() << " " << U->getName());
+ U->replaceUsesOfWith(Def, NewPhi);
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+ }
+}
+
+static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
+ // To unify the loop exits, we need a list of the exiting blocks as
+ // well as exit blocks. The functions for locating these lists both
+ // traverse the entire loop body. It is more efficient to first
+ // locate the exiting blocks and then examine their successors to
+ // locate the exit blocks.
+ SetVector<BasicBlock *> ExitingBlocks;
+ SetVector<BasicBlock *> Exits;
+
+ // We need SetVectors, but the Loop API takes a vector, so we use a temporary.
+ SmallVector<BasicBlock *, 8> Temp;
+ L->getExitingBlocks(Temp);
+ for (auto BB : Temp) {
+ ExitingBlocks.insert(BB);
+ for (auto S : successors(BB)) {
+ auto SL = LI.getLoopFor(S);
+ // A successor is not an exit if it is directly or indirectly in the
+ // current loop.
+ if (SL == L || L->contains(SL))
+ continue;
+ Exits.insert(S);
+ }
+ }
+
+ LLVM_DEBUG(
+ dbgs() << "Found exit blocks:";
+ for (auto Exit : Exits) {
+ dbgs() << " " << Exit->getName();
+ }
+ dbgs() << "\n";
+
+ dbgs() << "Found exiting blocks:";
+ for (auto EB : ExitingBlocks) {
+ dbgs() << " " << EB->getName();
+ }
+ dbgs() << "\n";);
+
+ if (Exits.size() <= 1) {
+ LLVM_DEBUG(dbgs() << "loop does not have multiple exits; nothing to do\n");
+ return false;
+ }
+
+ SmallVector<BasicBlock *, 8> GuardBlocks;
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ auto LoopExitBlock = CreateControlFlowHub(&DTU, GuardBlocks, ExitingBlocks,
+ Exits, "loop.exit");
+
+ restoreSSA(DT, L, ExitingBlocks, LoopExitBlock);
+
+#if defined(EXPENSIVE_CHECKS)
+ assert(DT.verify(DominatorTree::VerificationLevel::Full));
+#else
+ assert(DT.verify(DominatorTree::VerificationLevel::Fast));
+#endif // EXPENSIVE_CHECKS
+ L->verifyLoop();
+
+ // The guard blocks were created outside the loop, so they need to become
+ // members of the parent loop.
+ if (auto ParentLoop = L->getParentLoop()) {
+ for (auto G : GuardBlocks) {
+ ParentLoop->addBasicBlockToLoop(G, LI);
+ }
+ ParentLoop->verifyLoop();
+ }
+
+#if defined(EXPENSIVE_CHECKS)
+ LI.verify(DT);
+#endif // EXPENSIVE_CHECKS
+
+ return true;
+}
+
+bool UnifyLoopExits::runOnFunction(Function &F) {
+ LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName()
+ << "\n");
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ bool Changed = false;
+ auto Loops = LI.getLoopsInPreorder();
+ for (auto L : Loops) {
+ LLVM_DEBUG(dbgs() << "Loop: " << L->getHeader()->getName() << " (depth: "
+ << LI.getLoopDepth(L->getHeader()) << ")\n");
+ Changed |= unifyLoopExits(DT, LI, L);
+ }
+ return Changed;
+}
diff --git a/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp b/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp
new file mode 100644
index 0000000000000..5b58548e54dc1
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp
@@ -0,0 +1,97 @@
+//===- UniqueInternalLinkageNames.cpp - Unique Internal Linkage Sym Names -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements unique naming of internal linkage symbols with option
+// -funique-internal-linkage-symbols.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/UniqueInternalLinkageNames.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+static bool uniqueifyInternalLinkageNames(Module &M) {
+ llvm::MD5 Md5;
+ Md5.update(M.getSourceFileName());
+ llvm::MD5::MD5Result R;
+ Md5.final(R);
+ SmallString<32> Str;
+ llvm::MD5::stringifyResult(R, Str);
+ std::string ModuleNameHash = (Twine(".") + Twine(Str)).str();
+ bool Changed = false;
+
+ // Append the module hash to all internal linkage functions.
+ for (auto &F : M) {
+ if (F.hasInternalLinkage()) {
+ F.setName(F.getName() + ModuleNameHash);
+ Changed = true;
+ }
+ }
+
+ // Append the module hash to all internal linkage globals.
+ for (auto &GV : M.globals()) {
+ if (GV.hasInternalLinkage()) {
+ GV.setName(GV.getName() + ModuleNameHash);
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
+namespace {
+
+// Legacy pass that provides a name to every anon globals.
+class UniqueInternalLinkageNamesLegacyPass : public ModulePass {
+
+public:
+ /// Pass identification, replacement for typeid
+ static char ID;
+
+ /// Specify pass name for debug output
+ StringRef getPassName() const override {
+ return "Unique Internal Linkage Names";
+ }
+
+ explicit UniqueInternalLinkageNamesLegacyPass() : ModulePass(ID) {
+ initializeUniqueInternalLinkageNamesLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override {
+ return uniqueifyInternalLinkageNames(M);
+ }
+};
+
+char UniqueInternalLinkageNamesLegacyPass::ID = 0;
+} // anonymous namespace
+
+PreservedAnalyses
+UniqueInternalLinkageNamesPass::run(Module &M, ModuleAnalysisManager &AM) {
+ if (!uniqueifyInternalLinkageNames(M))
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
+
+INITIALIZE_PASS_BEGIN(UniqueInternalLinkageNamesLegacyPass,
+ "unique-internal-linkage-names",
+ "Uniqueify internal linkage names", false, false)
+INITIALIZE_PASS_END(UniqueInternalLinkageNamesLegacyPass,
+ "unique-internal-linkage-names",
+ "Uniqueify Internal linkage names", false, false)
+
+namespace llvm {
+ModulePass *createUniqueInternalLinkageNamesPass() {
+ return new UniqueInternalLinkageNamesLegacyPass();
+}
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Utils/Utils.cpp b/llvm/lib/Transforms/Utils/Utils.cpp
index 7769c7493cdab..ce98a739bea88 100644
--- a/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/llvm/lib/Transforms/Utils/Utils.cpp
@@ -24,8 +24,11 @@ using namespace llvm;
/// library.
void llvm::initializeTransformUtils(PassRegistry &Registry) {
initializeAddDiscriminatorsLegacyPassPass(Registry);
+ initializeAssumeSimplifyPassLegacyPassPass(Registry);
+ initializeAssumeBuilderPassLegacyPassPass(Registry);
initializeBreakCriticalEdgesPass(Registry);
initializeCanonicalizeAliasesLegacyPassPass(Registry);
+ initializeCanonicalizeFreezeInLoopsPass(Registry);
initializeInstNamerPass(Registry);
initializeLCSSAWrapperPassPass(Registry);
initializeLibCallsShrinkWrapLegacyPassPass(Registry);
@@ -40,6 +43,9 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {
initializeStripGCRelocatesPass(Registry);
initializePredicateInfoPrinterLegacyPassPass(Registry);
initializeInjectTLIMappingsLegacyPass(Registry);
+ initializeFixIrreduciblePass(Registry);
+ initializeUnifyLoopExitsPass(Registry);
+ initializeUniqueInternalLinkageNamesLegacyPassPass(Registry);
}
/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp
index 591e1fd2dbee1..6ff08cd287124 100644
--- a/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -1,16 +1,18 @@
#include "llvm/Transforms/Utils/VNCoercion.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "vncoerce"
+
namespace llvm {
namespace VNCoercion {
+static bool isFirstClassAggregateOrScalableType(Type *Ty) {
+ return Ty->isStructTy() || Ty->isArrayTy() || isa<ScalableVectorType>(Ty);
+}
+
/// Return true if coerceAvailableValueToLoadType will succeed.
bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
const DataLayout &DL) {
@@ -18,20 +20,20 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
if (StoredTy == LoadTy)
return true;
- // If the loaded or stored value is an first class array or struct, don't try
- // to transform them. We need to be able to bitcast to integer.
- if (LoadTy->isStructTy() || LoadTy->isArrayTy() || StoredTy->isStructTy() ||
- StoredTy->isArrayTy())
+ // If the loaded/stored value is a first class array/struct, or scalable type,
+ // don't try to transform them. We need to be able to bitcast to integer.
+ if (isFirstClassAggregateOrScalableType(LoadTy) ||
+ isFirstClassAggregateOrScalableType(StoredTy))
return false;
- uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy);
+ uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy).getFixedSize();
// The store size must be byte-aligned to support future type casts.
if (llvm::alignTo(StoreSize, 8) != StoreSize)
return false;
// The store has to be at least as big as the load.
- if (StoreSize < DL.getTypeSizeInBits(LoadTy))
+ if (StoreSize < DL.getTypeSizeInBits(LoadTy).getFixedSize())
return false;
// Don't coerce non-integral pointers to integers or vice versa.
@@ -55,14 +57,13 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,
assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) &&
"precondition violation - materialization can't fail");
if (auto *C = dyn_cast<Constant>(StoredVal))
- if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
- StoredVal = FoldedStoredVal;
+ StoredVal = ConstantFoldConstant(C, DL);
// If this is already the right type, just return it.
Type *StoredValTy = StoredVal->getType();
- uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy);
- uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy);
+ uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy).getFixedSize();
+ uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy).getFixedSize();
// If the store and reload are the same size, we can always reuse it.
if (StoredValSize == LoadedValSize) {
@@ -89,8 +90,7 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,
}
if (auto *C = dyn_cast<ConstantExpr>(StoredVal))
- if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
- StoredVal = FoldedStoredVal;
+ StoredVal = ConstantFoldConstant(C, DL);
return StoredVal;
}
@@ -115,8 +115,8 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,
// If this is a big-endian system, we need to shift the value down to the low
// bits so that a truncate will work.
if (DL.isBigEndian()) {
- uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) -
- DL.getTypeStoreSizeInBits(LoadedTy);
+ uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy).getFixedSize() -
+ DL.getTypeStoreSizeInBits(LoadedTy).getFixedSize();
StoredVal = Helper.CreateLShr(
StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt));
}
@@ -135,8 +135,7 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,
}
if (auto *C = dyn_cast<Constant>(StoredVal))
- if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
- StoredVal = FoldedStoredVal;
+ StoredVal = ConstantFoldConstant(C, DL);
return StoredVal;
}
@@ -148,7 +147,8 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,
///
/// If we can't do it, return null.
Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
- IRBuilder<> &IRB, const DataLayout &DL) {
+ IRBuilderBase &IRB,
+ const DataLayout &DL) {
return coerceAvailableValueToLoadTypeHelper(StoredVal, LoadedTy, IRB, DL);
}
@@ -164,9 +164,9 @@ static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
Value *WritePtr,
uint64_t WriteSizeInBits,
const DataLayout &DL) {
- // If the loaded or stored value is a first class array or struct, don't try
- // to transform them. We need to be able to bitcast to integer.
- if (LoadTy->isStructTy() || LoadTy->isArrayTy())
+ // If the loaded/stored value is a first class array/struct, or scalable type,
+ // don't try to transform them. We need to be able to bitcast to integer.
+ if (isFirstClassAggregateOrScalableType(LoadTy))
return -1;
int64_t StoreOffset = 0, LoadOffset = 0;
@@ -184,7 +184,7 @@ static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
// If the load and store don't overlap at all, the store doesn't provide
// anything to the load. In this case, they really don't alias at all, AA
// must have gotten confused.
- uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy);
+ uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize();
if ((WriteSizeInBits & 7) | (LoadSize & 7))
return -1;
@@ -218,10 +218,9 @@ static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
StoreInst *DepSI, const DataLayout &DL) {
auto *StoredVal = DepSI->getValueOperand();
-
- // Cannot handle reading from store of first-class aggregate yet.
- if (StoredVal->getType()->isStructTy() ||
- StoredVal->getType()->isArrayTy())
+
+ // Cannot handle reading from store of first-class aggregate or scalable type.
+ if (isFirstClassAggregateOrScalableType(StoredVal->getType()))
return -1;
// Don't coerce non-integral pointers to integers or vice versa.
@@ -235,11 +234,96 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
Value *StorePtr = DepSI->getPointerOperand();
uint64_t StoreSize =
- DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
+ DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()).getFixedSize();
return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize,
DL);
}
+/// Looks at a memory location for a load (specified by MemLocBase, Offs, and
+/// Size) and compares it against a load.
+///
+/// If the specified load could be safely widened to a larger integer load
+/// that is 1) still efficient, 2) safe for the target, and 3) would provide
+/// the specified memory location value, then this function returns the size
+/// in bytes of the load width to use. If not, this returns zero.
+static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase,
+ int64_t MemLocOffs,
+ unsigned MemLocSize,
+ const LoadInst *LI) {
+ // We can only extend simple integer loads.
+ if (!isa<IntegerType>(LI->getType()) || !LI->isSimple())
+ return 0;
+
+ // Load widening is hostile to ThreadSanitizer: it may cause false positives
+ // or make the reports more cryptic (access sizes are wrong).
+ if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
+ return 0;
+
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+
+ // Get the base of this load.
+ int64_t LIOffs = 0;
+ const Value *LIBase =
+ GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, DL);
+
+ // If the two pointers are not based on the same pointer, we can't tell that
+ // they are related.
+ if (LIBase != MemLocBase)
+ return 0;
+
+ // Okay, the two values are based on the same pointer, but returned as
+ // no-alias. This happens when we have things like two byte loads at "P+1"
+ // and "P+3". Check to see if increasing the size of the "LI" load up to its
+ // alignment (or the largest native integer type) will allow us to load all
+ // the bits required by MemLoc.
+
+ // If MemLoc is before LI, then no widening of LI will help us out.
+ if (MemLocOffs < LIOffs)
+ return 0;
+
+ // Get the alignment of the load in bytes. We assume that it is safe to load
+ // any legal integer up to this size without a problem. For example, if we're
+ // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
+ // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it
+ // to i16.
+ unsigned LoadAlign = LI->getAlignment();
+
+ int64_t MemLocEnd = MemLocOffs + MemLocSize;
+
+ // If no amount of rounding up will let MemLoc fit into LI, then bail out.
+ if (LIOffs + LoadAlign < MemLocEnd)
+ return 0;
+
+ // This is the size of the load to try. Start with the next larger power of
+ // two.
+ unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
+ NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
+
+ while (true) {
+ // If this load size is bigger than our known alignment or would not fit
+ // into a native integer register, then we fail.
+ if (NewLoadByteSize > LoadAlign ||
+ !DL.fitsInLegalInteger(NewLoadByteSize * 8))
+ return 0;
+
+ if (LIOffs + NewLoadByteSize > MemLocEnd &&
+ (LI->getParent()->getParent()->hasFnAttribute(
+ Attribute::SanitizeAddress) ||
+ LI->getParent()->getParent()->hasFnAttribute(
+ Attribute::SanitizeHWAddress)))
+ // We will be reading past the location accessed by the original program.
+ // While this is safe in a regular build, Address Safety analysis tools
+ // may start reporting false warnings. So, don't do widening.
+ return 0;
+
+ // If a load of this width would include all of MemLoc, then we succeed.
+ if (LIOffs + NewLoadByteSize >= MemLocEnd)
+ return NewLoadByteSize;
+
+ NewLoadByteSize <<= 1;
+ }
+}
+
/// This function is called when we have a
/// memdep query of a load that ends up being clobbered by another load. See if
/// the other load can feed into the second load.
@@ -255,7 +339,7 @@ int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
return -1;
Value *DepPtr = DepLI->getPointerOperand();
- uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType());
+ uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()).getFixedSize();
int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
if (R != -1)
return R;
@@ -265,10 +349,10 @@ int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
int64_t LoadOffs = 0;
const Value *LoadBase =
GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL);
- unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedSize();
- unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
- LoadBase, LoadOffs, LoadSize, DepLI);
+ unsigned Size =
+ getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI);
if (Size == 0)
return -1;
@@ -319,21 +403,17 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
if (Offset == -1)
return Offset;
- // Don't coerce non-integral pointers to integers or vice versa, and the
- // memtransfer is implicitly a raw byte code
- if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))
- // TODO: Can allow nullptrs from constant zeros
- return -1;
-
unsigned AS = Src->getType()->getPointerAddressSpace();
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
- Src =
- ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS));
- Constant *OffsetCst =
- ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
- OffsetCst);
+ if (Offset) {
+ Src = ConstantExpr::getBitCast(Src,
+ Type::getInt8PtrTy(Src->getContext(), AS));
+ Constant *OffsetCst =
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()),
+ Src, OffsetCst);
+ }
Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL))
return Offset;
@@ -355,8 +435,9 @@ static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy,
return SrcVal;
}
- uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
- uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8;
+ uint64_t StoreSize =
+ (DL.getTypeSizeInBits(SrcVal->getType()).getFixedSize() + 7) / 8;
+ uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy).getFixedSize() + 7) / 8;
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
if (SrcVal->getType()->isPtrOrPtrVectorTy())
@@ -408,8 +489,9 @@ Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
Instruction *InsertPt, const DataLayout &DL) {
// If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
// widen SrcVal out to a larger load.
- unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType());
- unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
+ unsigned SrcValStoreSize =
+ DL.getTypeStoreSize(SrcVal->getType()).getFixedSize();
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedSize();
if (Offset + LoadSize > SrcValStoreSize) {
assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
@@ -431,7 +513,7 @@ Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
LoadInst *NewLoad = Builder.CreateLoad(DestTy, PtrVal);
NewLoad->takeName(SrcVal);
- NewLoad->setAlignment(MaybeAlign(SrcVal->getAlignment()));
+ NewLoad->setAlignment(SrcVal->getAlign());
LLVM_DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
LLVM_DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
@@ -452,8 +534,9 @@ Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
Constant *getConstantLoadValueForLoad(Constant *SrcVal, unsigned Offset,
Type *LoadTy, const DataLayout &DL) {
- unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType());
- unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
+ unsigned SrcValStoreSize =
+ DL.getTypeStoreSize(SrcVal->getType()).getFixedSize();
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedSize();
if (Offset + LoadSize > SrcValStoreSize)
return nullptr;
return getConstantStoreValueForLoad(SrcVal, Offset, LoadTy, DL);
@@ -464,7 +547,7 @@ T *getMemInstValueForLoadHelper(MemIntrinsic *SrcInst, unsigned Offset,
Type *LoadTy, HelperClass &Helper,
const DataLayout &DL) {
LLVMContext &Ctx = LoadTy->getContext();
- uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy) / 8;
+ uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize() / 8;
// We know that this method is only called when the mem transfer fully
// provides the bits for the load.
@@ -500,16 +583,18 @@ T *getMemInstValueForLoadHelper(MemIntrinsic *SrcInst, unsigned Offset,
// Otherwise, this is a memcpy/memmove from a constant global.
MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
Constant *Src = cast<Constant>(MTI->getSource());
- unsigned AS = Src->getType()->getPointerAddressSpace();
+ unsigned AS = Src->getType()->getPointerAddressSpace();
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
- Src =
- ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS));
- Constant *OffsetCst =
- ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
- OffsetCst);
+ if (Offset) {
+ Src = ConstantExpr::getBitCast(Src,
+ Type::getInt8PtrTy(Src->getContext(), AS));
+ Constant *OffsetCst =
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()),
+ Src, OffsetCst);
+ }
Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL);
}
diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp
index da68d3713b404..f1b3fe8e2fa9a 100644
--- a/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -21,7 +21,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -369,7 +368,8 @@ Value *Mapper::mapValue(const Value *V) {
if (NewTy != IA->getFunctionType())
V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(),
- IA->hasSideEffects(), IA->isAlignStack());
+ IA->hasSideEffects(), IA->isAlignStack(),
+ IA->getDialect());
}
return getVM()[V] = const_cast<Value *>(V);
@@ -888,17 +888,17 @@ void Mapper::remapInstruction(Instruction *I) {
return;
// If the instruction's type is being remapped, do so now.
- if (auto CS = CallSite(I)) {
+ if (auto *CB = dyn_cast<CallBase>(I)) {
SmallVector<Type *, 3> Tys;
- FunctionType *FTy = CS.getFunctionType();
+ FunctionType *FTy = CB->getFunctionType();
Tys.reserve(FTy->getNumParams());
for (Type *Ty : FTy->params())
Tys.push_back(TypeMapper->remapType(Ty));
- CS.mutateFunctionType(FunctionType::get(
+ CB->mutateFunctionType(FunctionType::get(
TypeMapper->remapType(I->getType()), Tys, FTy->isVarArg()));
- LLVMContext &C = CS->getContext();
- AttributeList Attrs = CS.getAttributes();
+ LLVMContext &C = CB->getContext();
+ AttributeList Attrs = CB->getAttributes();
for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
if (Attrs.hasAttribute(i, Attribute::ByVal)) {
Type *Ty = Attrs.getAttribute(i, Attribute::ByVal).getValueAsType();
@@ -910,7 +910,7 @@ void Mapper::remapInstruction(Instruction *I) {
C, i, Attribute::getWithByValType(C, TypeMapper->remapType(Ty)));
}
}
- CS.setAttributes(Attrs);
+ CB->setAttributes(Attrs);
return;
}
if (auto *AI = dyn_cast<AllocaInst>(I))