aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms/Utils
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-01-27 22:17:16 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-06-04 11:59:19 +0000
commit390adc38fc112be360bd15499e5241bf4e675b6f (patch)
tree712d68d3aa03f7aa4902ba03dcac2a56f49ae0e5 /contrib/llvm-project/llvm/lib/Transforms/Utils
parent8a84287b0edc66fc6dede3db770d10ff41da5464 (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Utils')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp244
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp290
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp74
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp273
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp222
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp198
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp8
21 files changed, 797 insertions, 678 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
index fdc914a72bfd..c734611836eb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -22,19 +22,6 @@ using namespace llvm;
#define DEBUG_TYPE "amdgpu-emit-printf"
-static bool isCString(const Value *Arg) {
- auto Ty = Arg->getType();
- auto PtrTy = dyn_cast<PointerType>(Ty);
- if (!PtrTy)
- return false;
-
- auto IntTy = dyn_cast<IntegerType>(PtrTy->getElementType());
- if (!IntTy)
- return false;
-
- return IntTy->getBitWidth() == 8;
-}
-
static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) {
auto Int64Ty = Builder.getInt64Ty();
auto Ty = Arg->getType();
@@ -176,13 +163,15 @@ static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str,
static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg,
bool IsLast) {
+ Arg = Builder.CreateBitCast(
+ Arg, Builder.getInt8PtrTy(Arg->getType()->getPointerAddressSpace()));
auto Length = getStrlenWithNull(Builder, Arg);
return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
}
static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
bool SpecIsCString, bool IsLast) {
- if (SpecIsCString && isCString(Arg)) {
+ if (SpecIsCString && isa<PointerType>(Arg->getType())) {
return appendString(Builder, Desc, Arg, IsLast);
}
// If the format specifies a string but the argument is not, the frontend will
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 580cfd80141e..97f11ca71726 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -34,6 +34,7 @@ STATISTIC(NumReadNone, "Number of functions inferred as readnone");
STATISTIC(NumInaccessibleMemOnly,
"Number of functions inferred as inaccessiblememonly");
STATISTIC(NumReadOnly, "Number of functions inferred as readonly");
+STATISTIC(NumWriteOnly, "Number of functions inferred as writeonly");
STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly");
STATISTIC(NumInaccessibleMemOrArgMemOnly,
"Number of functions inferred as inaccessiblemem_or_argmemonly");
@@ -71,6 +72,19 @@ static bool setOnlyReadsMemory(Function &F) {
return true;
}
+static bool setOnlyWritesMemory(Function &F) {
+ if (F.onlyWritesMemory()) // writeonly or readnone
+ return false;
+ // Turn readonly and writeonly into readnone.
+ if (F.hasFnAttribute(Attribute::ReadOnly)) {
+ F.removeFnAttr(Attribute::ReadOnly);
+ return setDoesNotAccessMemory(F);
+ }
+ ++NumWriteOnly;
+ F.setOnlyWritesMemory();
+ return true;
+}
+
static bool setOnlyAccessesArgMemory(Function &F) {
if (F.onlyAccessesArgMemory())
return false;
@@ -233,6 +247,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
switch (TheLibFunc) {
case LibFunc_strlen:
+ case LibFunc_strnlen:
case LibFunc_wcslen:
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
@@ -400,6 +415,8 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
+ case LibFunc_aligned_alloc:
+ case LibFunc_valloc:
case LibFunc_malloc:
case LibFunc_vec_malloc:
Changed |= setOnlyAccessesInaccessibleMemory(F);
@@ -484,6 +501,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_realloc:
case LibFunc_vec_realloc:
+ case LibFunc_reallocf:
Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
Changed |= setRetNoUndef(F);
Changed |= setDoesNotThrow(F);
@@ -492,11 +510,6 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotCapture(F, 0);
Changed |= setArgNoUndef(F, 1);
return Changed;
- case LibFunc_reallocf:
- Changed |= setRetNoUndef(F);
- Changed |= setWillReturn(F);
- Changed |= setArgNoUndef(F, 1);
- return Changed;
case LibFunc_read:
// May throw; "read" is a valid pthread cancellation point.
Changed |= setRetAndArgsNoUndef(F);
@@ -536,13 +549,6 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc_aligned_alloc:
- Changed |= setOnlyAccessesInaccessibleMemory(F);
- Changed |= setRetAndArgsNoUndef(F);
- Changed |= setDoesNotThrow(F);
- Changed |= setRetDoesNotAlias(F);
- Changed |= setWillReturn(F);
- return Changed;
case LibFunc_bcopy:
Changed |= setDoesNotThrow(F);
Changed |= setOnlyAccessesArgMemory(F);
@@ -569,6 +575,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_calloc:
case LibFunc_vec_calloc:
+ Changed |= setOnlyAccessesInaccessibleMemory(F);
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
@@ -851,13 +858,6 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc_valloc:
- Changed |= setOnlyAccessesInaccessibleMemory(F);
- Changed |= setRetAndArgsNoUndef(F);
- Changed |= setDoesNotThrow(F);
- Changed |= setRetDoesNotAlias(F);
- Changed |= setWillReturn(F);
- return Changed;
case LibFunc_vprintf:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
@@ -1020,12 +1020,10 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_memset_pattern4:
case LibFunc_memset_pattern8:
case LibFunc_memset_pattern16:
- Changed |= setOnlyAccessesArgMemory(F);
Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyWritesMemory(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ LLVM_FALLTHROUGH;
case LibFunc_memset:
Changed |= setWillReturn(F);
LLVM_FALLTHROUGH;
@@ -1158,7 +1156,6 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_sqrt:
case LibFunc_sqrtf:
case LibFunc_sqrtl:
- case LibFunc_strnlen:
case LibFunc_tan:
case LibFunc_tanf:
case LibFunc_tanh:
@@ -1171,6 +1168,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_truncl:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotFreeMemory(F);
+ Changed |= setOnlyWritesMemory(F);
Changed |= setWillReturn(F);
return Changed;
default:
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp
index b2763900e154..ac3839f2a4ab 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp
@@ -20,8 +20,7 @@ using namespace llvm;
bool CallGraphUpdater::finalize() {
if (!DeadFunctionsInComdats.empty()) {
- filterDeadComdatFunctions(*DeadFunctionsInComdats.front()->getParent(),
- DeadFunctionsInComdats);
+ filterDeadComdatFunctions(DeadFunctionsInComdats);
DeadFunctions.append(DeadFunctionsInComdats.begin(),
DeadFunctionsInComdats.end());
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index ebe19f1751e5..56b6e4bc46a5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -500,7 +500,7 @@ CallBase &llvm::promoteCall(CallBase &CB, Function *Callee,
CB.setArgOperand(ArgNo, Cast);
// Remove any incompatible attributes for the argument.
- AttrBuilder ArgAttrs(CallerPAL.getParamAttrs(ArgNo));
+ AttrBuilder ArgAttrs(Ctx, CallerPAL.getParamAttrs(ArgNo));
ArgAttrs.remove(AttributeFuncs::typeIncompatible(FormalTy));
// We may have a different byval/inalloca type.
@@ -518,7 +518,7 @@ CallBase &llvm::promoteCall(CallBase &CB, Function *Callee,
// If the return type of the call site doesn't match that of the callee, cast
// the returned value to the appropriate type.
// Remove any incompatible return value attribute.
- AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
+ AttrBuilder RAttrs(Ctx, CallerPAL.getRetAttrs());
if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy) {
createRetBitCast(CB, CallSiteRetTy, RetBitCast);
RAttrs.remove(AttributeFuncs::typeIncompatible(CalleeRetTy));
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 96aff563aa9b..24cd5747c5a4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -829,39 +829,54 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
default: RetTy = Type::getInt16Ty(header->getContext()); break;
}
- std::vector<Type *> paramTy;
+ std::vector<Type *> ParamTy;
+ std::vector<Type *> AggParamTy;
+ ValueSet StructValues;
// Add the types of the input values to the function's argument list
for (Value *value : inputs) {
LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n");
- paramTy.push_back(value->getType());
+ if (AggregateArgs && !ExcludeArgsFromAggregate.contains(value)) {
+ AggParamTy.push_back(value->getType());
+ StructValues.insert(value);
+ } else
+ ParamTy.push_back(value->getType());
}
// Add the types of the output values to the function's argument list.
for (Value *output : outputs) {
LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n");
- if (AggregateArgs)
- paramTy.push_back(output->getType());
- else
- paramTy.push_back(PointerType::getUnqual(output->getType()));
+ if (AggregateArgs && !ExcludeArgsFromAggregate.contains(output)) {
+ AggParamTy.push_back(output->getType());
+ StructValues.insert(output);
+ } else
+ ParamTy.push_back(PointerType::getUnqual(output->getType()));
+ }
+
+ assert(
+ (ParamTy.size() + AggParamTy.size()) ==
+ (inputs.size() + outputs.size()) &&
+ "Number of scalar and aggregate params does not match inputs, outputs");
+ assert(StructValues.empty() ||
+ AggregateArgs && "Expeced StructValues only with AggregateArgs set");
+
+ // Concatenate scalar and aggregate params in ParamTy.
+ size_t NumScalarParams = ParamTy.size();
+ StructType *StructTy = nullptr;
+ if (AggregateArgs && !AggParamTy.empty()) {
+ StructTy = StructType::get(M->getContext(), AggParamTy);
+ ParamTy.push_back(PointerType::getUnqual(StructTy));
}
LLVM_DEBUG({
dbgs() << "Function type: " << *RetTy << " f(";
- for (Type *i : paramTy)
+ for (Type *i : ParamTy)
dbgs() << *i << ", ";
dbgs() << ")\n";
});
- StructType *StructTy = nullptr;
- if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
- StructTy = StructType::get(M->getContext(), paramTy);
- paramTy.clear();
- paramTy.push_back(PointerType::getUnqual(StructTy));
- }
- FunctionType *funcType =
- FunctionType::get(RetTy, paramTy,
- AllowVarArgs && oldFunction->isVarArg());
+ FunctionType *funcType = FunctionType::get(
+ RetTy, ParamTy, AllowVarArgs && oldFunction->isVarArg());
std::string SuffixToUse =
Suffix.empty()
@@ -871,13 +886,6 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
Function *newFunction = Function::Create(
funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(),
oldFunction->getName() + "." + SuffixToUse, M);
- // If the old function is no-throw, so is the new one.
- if (oldFunction->doesNotThrow())
- newFunction->setDoesNotThrow();
-
- // Inherit the uwtable attribute if we need to.
- if (oldFunction->hasUWTable())
- newFunction->setHasUWTable();
// Inherit all of the target dependent attributes and white-listed
// target independent attributes.
@@ -893,53 +901,26 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
} else
switch (Attr.getKindAsEnum()) {
// Those attributes cannot be propagated safely. Explicitly list them
- // here so we get a warning if new attributes are added. This list also
- // includes non-function attributes.
- case Attribute::Alignment:
+ // here so we get a warning if new attributes are added.
case Attribute::AllocSize:
case Attribute::ArgMemOnly:
case Attribute::Builtin:
- case Attribute::ByVal:
case Attribute::Convergent:
- case Attribute::Dereferenceable:
- case Attribute::DereferenceableOrNull:
- case Attribute::ElementType:
- case Attribute::InAlloca:
- case Attribute::InReg:
case Attribute::InaccessibleMemOnly:
case Attribute::InaccessibleMemOrArgMemOnly:
case Attribute::JumpTable:
case Attribute::Naked:
- case Attribute::Nest:
- case Attribute::NoAlias:
case Attribute::NoBuiltin:
- case Attribute::NoCapture:
case Attribute::NoMerge:
case Attribute::NoReturn:
case Attribute::NoSync:
- case Attribute::NoUndef:
- case Attribute::None:
- case Attribute::NonNull:
- case Attribute::Preallocated:
case Attribute::ReadNone:
case Attribute::ReadOnly:
- case Attribute::Returned:
case Attribute::ReturnsTwice:
- case Attribute::SExt:
case Attribute::Speculatable:
case Attribute::StackAlignment:
- case Attribute::StructRet:
- case Attribute::SwiftError:
- case Attribute::SwiftSelf:
- case Attribute::SwiftAsync:
case Attribute::WillReturn:
case Attribute::WriteOnly:
- case Attribute::ZExt:
- case Attribute::ImmArg:
- case Attribute::ByRef:
- case Attribute::EndAttrKinds:
- case Attribute::EmptyKey:
- case Attribute::TombstoneKey:
continue;
// Those attributes should be safe to propagate to the extracted function.
case Attribute::AlwaysInline:
@@ -980,30 +961,62 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::MustProgress:
case Attribute::NoProfile:
break;
+ // These attributes cannot be applied to functions.
+ case Attribute::Alignment:
+ case Attribute::ByVal:
+ case Attribute::Dereferenceable:
+ case Attribute::DereferenceableOrNull:
+ case Attribute::ElementType:
+ case Attribute::InAlloca:
+ case Attribute::InReg:
+ case Attribute::Nest:
+ case Attribute::NoAlias:
+ case Attribute::NoCapture:
+ case Attribute::NoUndef:
+ case Attribute::NonNull:
+ case Attribute::Preallocated:
+ case Attribute::Returned:
+ case Attribute::SExt:
+ case Attribute::StructRet:
+ case Attribute::SwiftError:
+ case Attribute::SwiftSelf:
+ case Attribute::SwiftAsync:
+ case Attribute::ZExt:
+ case Attribute::ImmArg:
+ case Attribute::ByRef:
+ // These are not really attributes.
+ case Attribute::None:
+ case Attribute::EndAttrKinds:
+ case Attribute::EmptyKey:
+ case Attribute::TombstoneKey:
+ llvm_unreachable("Not a function attribute");
}
newFunction->addFnAttr(Attr);
}
newFunction->getBasicBlockList().push_back(newRootNode);
- // Create an iterator to name all of the arguments we inserted.
- Function::arg_iterator AI = newFunction->arg_begin();
+ // Create scalar and aggregate iterators to name all of the arguments we
+ // inserted.
+ Function::arg_iterator ScalarAI = newFunction->arg_begin();
+ Function::arg_iterator AggAI = std::next(ScalarAI, NumScalarParams);
// Rewrite all users of the inputs in the extracted region to use the
// arguments (or appropriate addressing into struct) instead.
- for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+ for (unsigned i = 0, e = inputs.size(), aggIdx = 0; i != e; ++i) {
Value *RewriteVal;
- if (AggregateArgs) {
+ if (AggregateArgs && StructValues.contains(inputs[i])) {
Value *Idx[2];
Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
- Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), aggIdx);
Instruction *TI = newFunction->begin()->getTerminator();
GetElementPtrInst *GEP = GetElementPtrInst::Create(
- StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI);
- RewriteVal = new LoadInst(StructTy->getElementType(i), GEP,
+ StructTy, &*AggAI, Idx, "gep_" + inputs[i]->getName(), TI);
+ RewriteVal = new LoadInst(StructTy->getElementType(aggIdx), GEP,
"loadgep_" + inputs[i]->getName(), TI);
+ ++aggIdx;
} else
- RewriteVal = &*AI++;
+ RewriteVal = &*ScalarAI++;
std::vector<User *> Users(inputs[i]->user_begin(), inputs[i]->user_end());
for (User *use : Users)
@@ -1013,12 +1026,14 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
}
// Set names for input and output arguments.
- if (!AggregateArgs) {
- AI = newFunction->arg_begin();
- for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI)
- AI->setName(inputs[i]->getName());
- for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI)
- AI->setName(outputs[i]->getName()+".out");
+ if (NumScalarParams) {
+ ScalarAI = newFunction->arg_begin();
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++ScalarAI)
+ if (!StructValues.contains(inputs[i]))
+ ScalarAI->setName(inputs[i]->getName());
+ for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++ScalarAI)
+ if (!StructValues.contains(outputs[i]))
+ ScalarAI->setName(outputs[i]->getName() + ".out");
}
// Rewrite branches to basic blocks outside of the loop to new dummy blocks
@@ -1126,7 +1141,8 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
ValueSet &outputs) {
// Emit a call to the new function, passing in: *pointer to struct (if
// aggregating parameters), or plan inputs and allocated memory for outputs
- std::vector<Value *> params, StructValues, ReloadOutputs, Reloads;
+ std::vector<Value *> params, ReloadOutputs, Reloads;
+ ValueSet StructValues;
Module *M = newFunction->getParent();
LLVMContext &Context = M->getContext();
@@ -1134,23 +1150,24 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
CallInst *call = nullptr;
// Add inputs as params, or to be filled into the struct
- unsigned ArgNo = 0;
+ unsigned ScalarInputArgNo = 0;
SmallVector<unsigned, 1> SwiftErrorArgs;
for (Value *input : inputs) {
- if (AggregateArgs)
- StructValues.push_back(input);
+ if (AggregateArgs && !ExcludeArgsFromAggregate.contains(input))
+ StructValues.insert(input);
else {
params.push_back(input);
if (input->isSwiftError())
- SwiftErrorArgs.push_back(ArgNo);
+ SwiftErrorArgs.push_back(ScalarInputArgNo);
}
- ++ArgNo;
+ ++ScalarInputArgNo;
}
// Create allocas for the outputs
+ unsigned ScalarOutputArgNo = 0;
for (Value *output : outputs) {
- if (AggregateArgs) {
- StructValues.push_back(output);
+ if (AggregateArgs && !ExcludeArgsFromAggregate.contains(output)) {
+ StructValues.insert(output);
} else {
AllocaInst *alloca =
new AllocaInst(output->getType(), DL.getAllocaAddrSpace(),
@@ -1158,12 +1175,14 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
&codeReplacer->getParent()->front().front());
ReloadOutputs.push_back(alloca);
params.push_back(alloca);
+ ++ScalarOutputArgNo;
}
}
StructType *StructArgTy = nullptr;
AllocaInst *Struct = nullptr;
- if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+ unsigned NumAggregatedInputs = 0;
+ if (AggregateArgs && !StructValues.empty()) {
std::vector<Type *> ArgTypes;
for (Value *V : StructValues)
ArgTypes.push_back(V->getType());
@@ -1175,14 +1194,18 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
&codeReplacer->getParent()->front().front());
params.push_back(Struct);
- for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
- Value *Idx[2];
- Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
- Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
- GetElementPtrInst *GEP = GetElementPtrInst::Create(
- StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName());
- codeReplacer->getInstList().push_back(GEP);
- new StoreInst(StructValues[i], GEP, codeReplacer);
+ // Store aggregated inputs in the struct.
+ for (unsigned i = 0, e = StructValues.size(); i != e; ++i) {
+ if (inputs.contains(StructValues[i])) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName());
+ codeReplacer->getInstList().push_back(GEP);
+ new StoreInst(StructValues[i], GEP, codeReplacer);
+ NumAggregatedInputs++;
+ }
}
}
@@ -1205,24 +1228,24 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError);
}
- Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
- unsigned FirstOut = inputs.size();
- if (!AggregateArgs)
- std::advance(OutputArgBegin, inputs.size());
-
- // Reload the outputs passed in by reference.
- for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
+ // Reload the outputs passed in by reference, use the struct if output is in
+ // the aggregate or reload from the scalar argument.
+ for (unsigned i = 0, e = outputs.size(), scalarIdx = 0,
+ aggIdx = NumAggregatedInputs;
+ i != e; ++i) {
Value *Output = nullptr;
- if (AggregateArgs) {
+ if (AggregateArgs && StructValues.contains(outputs[i])) {
Value *Idx[2];
Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
- Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), aggIdx);
GetElementPtrInst *GEP = GetElementPtrInst::Create(
StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName());
codeReplacer->getInstList().push_back(GEP);
Output = GEP;
+ ++aggIdx;
} else {
- Output = ReloadOutputs[i];
+ Output = ReloadOutputs[scalarIdx];
+ ++scalarIdx;
}
LoadInst *load = new LoadInst(outputs[i]->getType(), Output,
outputs[i]->getName() + ".reload",
@@ -1304,8 +1327,13 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
// Store the arguments right after the definition of output value.
// This should be proceeded after creating exit stubs to be ensure that invoke
// result restore will be placed in the outlined function.
- Function::arg_iterator OAI = OutputArgBegin;
- for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
+ Function::arg_iterator ScalarOutputArgBegin = newFunction->arg_begin();
+ std::advance(ScalarOutputArgBegin, ScalarInputArgNo);
+ Function::arg_iterator AggOutputArgBegin = newFunction->arg_begin();
+ std::advance(AggOutputArgBegin, ScalarInputArgNo + ScalarOutputArgNo);
+
+ for (unsigned i = 0, e = outputs.size(), aggIdx = NumAggregatedInputs; i != e;
+ ++i) {
auto *OutI = dyn_cast<Instruction>(outputs[i]);
if (!OutI)
continue;
@@ -1325,23 +1353,27 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
assert((InsertBefore->getFunction() == newFunction ||
Blocks.count(InsertBefore->getParent())) &&
"InsertPt should be in new function");
- assert(OAI != newFunction->arg_end() &&
- "Number of output arguments should match "
- "the amount of defined values");
- if (AggregateArgs) {
+ if (AggregateArgs && StructValues.contains(outputs[i])) {
+ assert(AggOutputArgBegin != newFunction->arg_end() &&
+ "Number of aggregate output arguments should match "
+ "the number of defined values");
Value *Idx[2];
Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
- Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), aggIdx);
GetElementPtrInst *GEP = GetElementPtrInst::Create(
- StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(),
+ StructArgTy, &*AggOutputArgBegin, Idx, "gep_" + outputs[i]->getName(),
InsertBefore);
new StoreInst(outputs[i], GEP, InsertBefore);
+ ++aggIdx;
// Since there should be only one struct argument aggregating
- // all the output values, we shouldn't increment OAI, which always
- // points to the struct argument, in this case.
+ // all the output values, we shouldn't increment AggOutputArgBegin, which
+ // always points to the struct argument, in this case.
} else {
- new StoreInst(outputs[i], &*OAI, InsertBefore);
- ++OAI;
+ assert(ScalarOutputArgBegin != newFunction->arg_end() &&
+ "Number of scalar output arguments should match "
+ "the number of defined values");
+ new StoreInst(outputs[i], &*ScalarOutputArgBegin, InsertBefore);
+ ++ScalarOutputArgBegin;
}
}
@@ -1840,3 +1872,7 @@ bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc,
}
return false;
}
+
+void CodeExtractor::excludeArgFromAggregate(Value *Arg) {
+ ExcludeArgsFromAggregate.insert(Arg);
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp
index 91630d876fc8..e73287c060ae 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -122,129 +122,114 @@ isSimpleEnoughValueToCommit(Constant *C,
return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL);
}
-/// Return true if this constant is simple enough for us to understand. In
-/// particular, if it is a cast to anything other than from one pointer type to
-/// another pointer type, we punt. We basically just support direct accesses to
-/// globals and GEP's of globals. This should be kept up to date with
-/// CommitValueTo.
-static bool isSimpleEnoughPointerToCommit(Constant *C, const DataLayout &DL) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
- // Do not allow weak/*_odr/linkonce linkage or external globals.
- return GV->hasUniqueInitializer();
-
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
- // Handle a constantexpr gep.
- if (CE->getOpcode() == Instruction::GetElementPtr &&
- isa<GlobalVariable>(CE->getOperand(0)) &&
- cast<GEPOperator>(CE)->isInBounds()) {
- GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
- // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
- // external globals.
- if (!GV->hasUniqueInitializer())
- return false;
+void Evaluator::MutableValue::clear() {
+ if (auto *Agg = Val.dyn_cast<MutableAggregate *>())
+ delete Agg;
+ Val = nullptr;
+}
- // The first index must be zero.
- ConstantInt *CI = dyn_cast<ConstantInt>(*std::next(CE->op_begin()));
- if (!CI || !CI->isZero()) return false;
+Constant *Evaluator::MutableValue::read(Type *Ty, APInt Offset,
+ const DataLayout &DL) const {
+ TypeSize TySize = DL.getTypeStoreSize(Ty);
+ const MutableValue *V = this;
+ while (const auto *Agg = V->Val.dyn_cast<MutableAggregate *>()) {
+ Type *AggTy = Agg->Ty;
+ Optional<APInt> Index = DL.getGEPIndexForOffset(AggTy, Offset);
+ if (!Index || Index->uge(Agg->Elements.size()) ||
+ !TypeSize::isKnownLE(TySize, DL.getTypeStoreSize(AggTy)))
+ return nullptr;
+
+ V = &Agg->Elements[Index->getZExtValue()];
+ }
- // The remaining indices must be compile-time known integers within the
- // notional bounds of the corresponding static array types.
- if (!CE->isGEPWithNoNotionalOverIndexing())
- return false;
+ return ConstantFoldLoadFromConst(V->Val.get<Constant *>(), Ty, Offset, DL);
+}
- return ConstantFoldLoadThroughGEPConstantExpr(
- GV->getInitializer(), CE,
- cast<GEPOperator>(CE)->getResultElementType(), DL);
- } else if (CE->getOpcode() == Instruction::BitCast &&
- isa<GlobalVariable>(CE->getOperand(0))) {
- // A constantexpr bitcast from a pointer to another pointer is a no-op,
- // and we know how to evaluate it by moving the bitcast from the pointer
- // operand to the value operand.
- // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
- // external globals.
- return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
- }
- }
+bool Evaluator::MutableValue::makeMutable() {
+ Constant *C = Val.get<Constant *>();
+ Type *Ty = C->getType();
+ unsigned NumElements;
+ if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
+ NumElements = VT->getNumElements();
+ } else if (auto *AT = dyn_cast<ArrayType>(Ty))
+ NumElements = AT->getNumElements();
+ else if (auto *ST = dyn_cast<StructType>(Ty))
+ NumElements = ST->getNumElements();
+ else
+ return false;
- return false;
+ MutableAggregate *MA = new MutableAggregate(Ty);
+ MA->Elements.reserve(NumElements);
+ for (unsigned I = 0; I < NumElements; ++I)
+ MA->Elements.push_back(C->getAggregateElement(I));
+ Val = MA;
+ return true;
}
-/// Apply \p TryLoad to Ptr. If this returns \p nullptr, introspect the
-/// pointer's type and walk down through the initial elements to obtain
-/// additional pointers to try. Returns the first non-null return value from
-/// \p TryLoad, or \p nullptr if the type can't be introspected further.
-static Constant *
-evaluateBitcastFromPtr(Constant *Ptr, const DataLayout &DL,
- const TargetLibraryInfo *TLI,
- std::function<Constant *(Constant *)> TryLoad) {
- Constant *Val;
- while (!(Val = TryLoad(Ptr))) {
- // If Ty is a non-opaque struct, we can convert the pointer to the struct
- // into a pointer to its first member.
- // FIXME: This could be extended to support arrays as well.
- Type *Ty = cast<PointerType>(Ptr->getType())->getElementType();
- if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isOpaque())
- break;
-
- IntegerType *IdxTy = IntegerType::get(Ty->getContext(), 32);
- Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
- Constant *const IdxList[] = {IdxZero, IdxZero};
-
- Ptr = ConstantExpr::getGetElementPtr(Ty, Ptr, IdxList);
- Ptr = ConstantFoldConstant(Ptr, DL, TLI);
+bool Evaluator::MutableValue::write(Constant *V, APInt Offset,
+ const DataLayout &DL) {
+ Type *Ty = V->getType();
+ TypeSize TySize = DL.getTypeStoreSize(Ty);
+ MutableValue *MV = this;
+ while (Offset != 0 ||
+ !CastInst::isBitOrNoopPointerCastable(Ty, MV->getType(), DL)) {
+ if (MV->Val.is<Constant *>() && !MV->makeMutable())
+ return false;
+
+ MutableAggregate *Agg = MV->Val.get<MutableAggregate *>();
+ Type *AggTy = Agg->Ty;
+ Optional<APInt> Index = DL.getGEPIndexForOffset(AggTy, Offset);
+ if (!Index || Index->uge(Agg->Elements.size()) ||
+ !TypeSize::isKnownLE(TySize, DL.getTypeStoreSize(AggTy)))
+ return false;
+
+ MV = &Agg->Elements[Index->getZExtValue()];
}
- return Val;
+
+ Type *MVType = MV->getType();
+ MV->clear();
+ if (Ty->isIntegerTy() && MVType->isPointerTy())
+ MV->Val = ConstantExpr::getIntToPtr(V, MVType);
+ else if (Ty->isPointerTy() && MVType->isIntegerTy())
+ MV->Val = ConstantExpr::getPtrToInt(V, MVType);
+ else if (Ty != MVType)
+ MV->Val = ConstantExpr::getBitCast(V, MVType);
+ else
+ MV->Val = V;
+ return true;
}
-static Constant *getInitializer(Constant *C) {
- auto *GV = dyn_cast<GlobalVariable>(C);
- return GV && GV->hasDefinitiveInitializer() ? GV->getInitializer() : nullptr;
+Constant *Evaluator::MutableAggregate::toConstant() const {
+ SmallVector<Constant *, 32> Consts;
+ for (const MutableValue &MV : Elements)
+ Consts.push_back(MV.toConstant());
+
+ if (auto *ST = dyn_cast<StructType>(Ty))
+ return ConstantStruct::get(ST, Consts);
+ if (auto *AT = dyn_cast<ArrayType>(Ty))
+ return ConstantArray::get(AT, Consts);
+ assert(isa<FixedVectorType>(Ty) && "Must be vector");
+ return ConstantVector::get(Consts);
}
/// Return the value that would be computed by a load from P after the stores
/// reflected by 'memory' have been performed. If we can't decide, return null.
Constant *Evaluator::ComputeLoadResult(Constant *P, Type *Ty) {
- // If this memory location has been recently stored, use the stored value: it
- // is the most up-to-date.
- auto TryFindMemLoc = [this](Constant *Ptr) {
- return MutatedMemory.lookup(Ptr);
- };
-
- if (Constant *Val = TryFindMemLoc(P))
- return Val;
-
- // Access it.
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
- if (GV->hasDefinitiveInitializer())
- return GV->getInitializer();
+ APInt Offset(DL.getIndexTypeSizeInBits(P->getType()), 0);
+ P = cast<Constant>(P->stripAndAccumulateConstantOffsets(
+ DL, Offset, /* AllowNonInbounds */ true));
+ Offset = Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(P->getType()));
+ auto *GV = dyn_cast<GlobalVariable>(P);
+ if (!GV)
return nullptr;
- }
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) {
- switch (CE->getOpcode()) {
- // Handle a constantexpr getelementptr.
- case Instruction::GetElementPtr:
- if (auto *I = getInitializer(CE->getOperand(0)))
- return ConstantFoldLoadThroughGEPConstantExpr(I, CE, Ty, DL);
- break;
- // Handle a constantexpr bitcast.
- case Instruction::BitCast:
- // We're evaluating a load through a pointer that was bitcast to a
- // different type. See if the "from" pointer has recently been stored.
- // If it hasn't, we may still be able to find a stored pointer by
- // introspecting the type.
- Constant *Val =
- evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, TryFindMemLoc);
- if (!Val)
- Val = getInitializer(CE->getOperand(0));
- if (Val)
- return ConstantFoldLoadThroughBitcast(
- Val, P->getType()->getPointerElementType(), DL);
- break;
- }
- }
+ auto It = MutatedMemory.find(GV);
+ if (It != MutatedMemory.end())
+ return It->second.read(Ty, Offset, DL);
- return nullptr; // don't know how to evaluate.
+ if (!GV->hasDefinitiveInitializer())
+ return nullptr;
+ return ConstantFoldLoadFromConst(GV->getInitializer(), Ty, Offset, DL);
}
static Function *getFunction(Constant *C) {
@@ -260,17 +245,10 @@ static Function *getFunction(Constant *C) {
Function *
Evaluator::getCalleeWithFormalArgs(CallBase &CB,
SmallVectorImpl<Constant *> &Formals) {
- auto *V = CB.getCalledOperand();
+ auto *V = CB.getCalledOperand()->stripPointerCasts();
if (auto *Fn = getFunction(getVal(V)))
return getFormalParams(CB, Fn, Formals) ? Fn : nullptr;
-
- auto *CE = dyn_cast<ConstantExpr>(V);
- if (!CE || CE->getOpcode() != Instruction::BitCast ||
- !getFormalParams(CB, getFunction(CE->getOperand(0)), Formals))
- return nullptr;
-
- return dyn_cast<Function>(
- ConstantFoldLoadThroughBitcast(CE, CE->getOperand(0)->getType(), DL));
+ return nullptr;
}
bool Evaluator::getFormalParams(CallBase &CB, Function *F,
@@ -299,17 +277,13 @@ bool Evaluator::getFormalParams(CallBase &CB, Function *F,
/// If call expression contains bitcast then we may need to cast
/// evaluated return value to a type of the call expression.
-Constant *Evaluator::castCallResultIfNeeded(Value *CallExpr, Constant *RV) {
- ConstantExpr *CE = dyn_cast<ConstantExpr>(CallExpr);
- if (!RV || !CE || CE->getOpcode() != Instruction::BitCast)
+Constant *Evaluator::castCallResultIfNeeded(Type *ReturnType, Constant *RV) {
+ if (!RV || RV->getType() == ReturnType)
return RV;
- if (auto *FT =
- dyn_cast<FunctionType>(CE->getType()->getPointerElementType())) {
- RV = ConstantFoldLoadThroughBitcast(RV, FT->getReturnType(), DL);
- if (!RV)
- LLVM_DEBUG(dbgs() << "Failed to fold bitcast call expr\n");
- }
+ RV = ConstantFoldLoadThroughBitcast(RV, ReturnType, DL);
+ if (!RV)
+ LLVM_DEBUG(dbgs() << "Failed to fold bitcast call expr\n");
return RV;
}
@@ -337,68 +311,30 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
Ptr = FoldedPtr;
LLVM_DEBUG(dbgs() << "; To: " << *Ptr << "\n");
}
- // Conservatively, avoid aggregate types. This is because we don't
- // want to worry about them partially overlapping other stores.
- if (!SI->getValueOperand()->getType()->isSingleValueType() ||
- !isSimpleEnoughPointerToCommit(Ptr, DL)) {
- // If this is too complex for us to commit, reject it.
- LLVM_DEBUG(
- dbgs() << "Pointer is too complex for us to evaluate store.");
+
+ APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
+ Ptr = cast<Constant>(Ptr->stripAndAccumulateConstantOffsets(
+ DL, Offset, /* AllowNonInbounds */ true));
+ Offset = Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(Ptr->getType()));
+ auto *GV = dyn_cast<GlobalVariable>(Ptr);
+ if (!GV || !GV->hasUniqueInitializer()) {
+ LLVM_DEBUG(dbgs() << "Store is not to global with unique initializer: "
+ << *Ptr << "\n");
return false;
}
- Constant *Val = getVal(SI->getOperand(0));
-
// If this might be too difficult for the backend to handle (e.g. the addr
// of one global variable divided by another) then we can't commit it.
+ Constant *Val = getVal(SI->getOperand(0));
if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) {
LLVM_DEBUG(dbgs() << "Store value is too complex to evaluate store. "
<< *Val << "\n");
return false;
}
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
- if (CE->getOpcode() == Instruction::BitCast) {
- LLVM_DEBUG(dbgs()
- << "Attempting to resolve bitcast on constant ptr.\n");
- // If we're evaluating a store through a bitcast, then we need
- // to pull the bitcast off the pointer type and push it onto the
- // stored value. In order to push the bitcast onto the stored value,
- // a bitcast from the pointer's element type to Val's type must be
- // legal. If it's not, we can try introspecting the type to find a
- // legal conversion.
-
- auto TryCastValTy = [&](Constant *P) -> Constant * {
- // The conversion is illegal if the store is wider than the
- // pointee proposed by `evaluateBitcastFromPtr`, since that would
- // drop stores to other struct elements when the caller attempts to
- // look through a struct's 0th element.
- Type *NewTy = cast<PointerType>(P->getType())->getElementType();
- Type *STy = Val->getType();
- if (DL.getTypeSizeInBits(NewTy) < DL.getTypeSizeInBits(STy))
- return nullptr;
-
- if (Constant *FV = ConstantFoldLoadThroughBitcast(Val, NewTy, DL)) {
- Ptr = P;
- return FV;
- }
- return nullptr;
- };
-
- Constant *NewVal =
- evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, TryCastValTy);
- if (!NewVal) {
- LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
- "evaluate.\n");
- return false;
- }
-
- Val = NewVal;
- LLVM_DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");
- }
- }
-
- MutatedMemory[Ptr] = Val;
+ auto Res = MutatedMemory.try_emplace(GV, GV->getInitializer());
+ if (!Res.first->second.write(Val, Offset, DL))
+ return false;
} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
InstResult = ConstantExpr::get(BO->getOpcode(),
getVal(BO->getOperand(0)),
@@ -593,7 +529,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
if (Callee->isDeclaration()) {
// If this is a function we can constant fold, do it.
if (Constant *C = ConstantFoldCall(&CB, Callee, Formals, TLI)) {
- InstResult = castCallResultIfNeeded(CB.getCalledOperand(), C);
+ InstResult = castCallResultIfNeeded(CB.getType(), C);
if (!InstResult)
return false;
LLVM_DEBUG(dbgs() << "Constant folded function call. Result: "
@@ -617,7 +553,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
return false;
}
ValueStack.pop_back();
- InstResult = castCallResultIfNeeded(CB.getCalledOperand(), RetVal);
+ InstResult = castCallResultIfNeeded(CB.getType(), RetVal);
if (RetVal && !InstResult)
return false;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index 9bfc73e4ba6c..f8ec8c6ad426 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -66,8 +66,6 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
for (const Use &U : V->uses()) {
const User *UR = U.getUser();
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) {
- GS.HasNonInstructionUser = true;
-
// If the result of the constantexpr isn't pointer type, then we won't
// know to expect it in various places. Just reject early.
if (!isa<PointerType>(CE->getType()))
@@ -105,9 +103,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
// value, not an aggregate), keep more specific information about
// stores.
if (GS.StoredType != GlobalStatus::Stored) {
- const Value *Ptr = SI->getPointerOperand();
- if (isa<ConstantExpr>(Ptr))
- Ptr = Ptr->stripPointerCasts();
+ const Value *Ptr = SI->getPointerOperand()->stripPointerCasts();
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
Value *StoredVal = SI->getOperand(0);
@@ -174,12 +170,10 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
return true; // Any other non-load instruction might take address!
}
} else if (const Constant *C = dyn_cast<Constant>(UR)) {
- GS.HasNonInstructionUser = true;
// We might have a dead and dangling constant hanging off of here.
if (!isSafeToDestroyConstant(C))
return true;
} else {
- GS.HasNonInstructionUser = true;
// Otherwise must be some other user.
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 997667810580..c9f872f5b7e1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1185,10 +1185,10 @@ static bool MayContainThrowingOrExitingCall(Instruction *Begin,
static AttrBuilder IdentifyValidAttributes(CallBase &CB) {
- AttrBuilder AB(CB.getAttributes(), AttributeList::ReturnIndex);
- if (AB.empty())
+ AttrBuilder AB(CB.getContext(), CB.getAttributes().getRetAttrs());
+ if (!AB.hasAttributes())
return AB;
- AttrBuilder Valid;
+ AttrBuilder Valid(CB.getContext());
// Only allow these white listed attributes to be propagated back to the
// callee. This is because other attributes may only be valid on the call
// itself, i.e. attributes such as signext and zeroext.
@@ -1208,7 +1208,7 @@ static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
return;
AttrBuilder Valid = IdentifyValidAttributes(CB);
- if (Valid.empty())
+ if (!Valid.hasAttributes())
return;
auto *CalledFunction = CB.getCalledFunction();
auto &Context = CalledFunction->getContext();
@@ -1667,7 +1667,7 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
Module *Mod = CB.getModule();
assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function");
bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV,
- IsClaimRV = !IsRetainRV;
+ IsUnsafeClaimRV = !IsRetainRV;
for (auto *RI : Returns) {
Value *RetOpnd = objcarc::GetRCIdentityRoot(RI->getOperand(0));
@@ -1694,7 +1694,7 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
// and erase the autoreleaseRV call.
// - If retainRV is attached to the call, just erase the autoreleaseRV
// call.
- if (IsClaimRV) {
+ if (IsUnsafeClaimRV) {
Builder.SetInsertPoint(II);
Function *IFn =
Intrinsic::getDeclaration(Mod, Intrinsic::objc_release);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp
index 668626fef933..72b864dc3e48 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -339,8 +339,10 @@ bool llvm::formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
#ifdef EXPENSIVE_CHECKS
// Verify all sub-loops are in LCSSA form already.
- for (Loop *SubLoop: L)
+ for (Loop *SubLoop: L) {
+ (void)SubLoop; // Silence unused variable warning.
assert(SubLoop->isRecursivelyLCSSAForm(DT, *LI) && "Subloop not in LCSSA!");
+ }
#endif
SmallVector<BasicBlock *, 8> ExitBlocks;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
index ecad79b68185..9f33d2f82732 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
@@ -492,7 +492,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
}
}
- if (isAllocLikeFn(I, TLI))
+ if (isAllocationFn(I, TLI) && isAllocRemovable(cast<CallBase>(I), TLI))
return true;
if (CallInst *CI = isFreeCall(I, TLI))
@@ -2189,8 +2189,8 @@ CallInst *llvm::createCallMatchingInvoke(InvokeInst *II) {
return NewCall;
}
-/// changeToCall - Convert the specified invoke into a normal call.
-void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) {
+// changeToCall - Convert the specified invoke into a normal call.
+CallInst *llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) {
CallInst *NewCall = createCallMatchingInvoke(II);
NewCall->takeName(II);
NewCall->insertBefore(II);
@@ -2207,6 +2207,7 @@ void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) {
II->eraseFromParent();
if (DTU)
DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}});
+ return NewCall;
}
BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
@@ -3147,11 +3148,6 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
if (!ITy->isIntOrIntVectorTy() || ITy->getScalarSizeInBits() > 128)
return false; // Can't do integer/elements > 128 bits.
- Type *DemandedTy = ITy;
- if (I->hasOneUse())
- if (auto *Trunc = dyn_cast<TruncInst>(I->user_back()))
- DemandedTy = Trunc->getType();
-
// Try to find all the pieces corresponding to the bswap.
bool FoundRoot = false;
std::map<Value *, Optional<BitPart>> BPS;
@@ -3165,6 +3161,7 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
"Illegal bit provenance index");
// If the upper bits are zero, then attempt to perform as a truncated op.
+ Type *DemandedTy = ITy;
if (BitProvenance.back() == BitPart::Unset) {
while (!BitProvenance.empty() && BitProvenance.back() == BitPart::Unset)
BitProvenance = BitProvenance.drop_back();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 69fd110dc3c2..92333408aaef 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -359,7 +359,7 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::PeelingPreferences &PP,
- unsigned &TripCount, DominatorTree &DT,
+ unsigned TripCount, DominatorTree &DT,
ScalarEvolution &SE, unsigned Threshold) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");
// Save the PP.PeelCount value set by the target in
@@ -370,7 +370,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
return;
// Only try to peel innermost loops by default.
- // The constraint can be relaxed by the target in TTI.getUnrollingPreferences
+ // The constraint can be relaxed by the target in TTI.getPeelingPreferences
// or by the flag -unroll-allow-loop-nests-peeling.
if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
return;
@@ -407,8 +407,8 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
SmallDenseMap<PHINode *, Optional<unsigned> > IterationsToInvariance;
// Now go through all Phis to calculate their the number of iterations they
// need to become invariants.
- // Start the max computation with the UP.PeelCount value set by the target
- // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
+ // Start the max computation with the PP.PeelCount value set by the target
+ // in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
unsigned DesiredPeelCount = TargetPeelCount;
BasicBlock *BackEdge = L->getLoopLatch();
assert(BackEdge && "Loop is not in simplified form?");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index b0c622b98d5e..9ca1f4f44b97 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -99,6 +99,17 @@ UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
#endif
);
+static cl::opt<bool>
+UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden,
+ cl::desc("Verify loopinfo after unrolling"),
+#ifdef EXPENSIVE_CHECKS
+ cl::init(true)
+#else
+ cl::init(false)
+#endif
+ );
+
+
/// Check if unrolling created a situation where we need to insert phi nodes to
/// preserve LCSSA form.
/// \param Blocks is a vector of basic blocks representing unrolled loop.
@@ -764,6 +775,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// Apply updates to the DomTree.
DT = &DTU.getDomTree();
+ assert(!UnrollVerifyDomtree ||
+ DT->verify(DominatorTree::VerificationLevel::Fast));
+
// At this point, the code is well formed. We now simplify the unrolled loop,
// doing constant propagation and dead code elimination as we go.
simplifyLoopAfterUnroll(L, !CompletelyUnroll && ULO.Count > 1, LI, SE, DT, AC,
@@ -777,6 +791,10 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (CompletelyUnroll)
LI->erase(L);
+ // LoopInfo should not be valid, confirm that.
+ if (UnrollVerifyLoopInfo)
+ LI->verify(*DT);
+
// After complete unrolling most of the blocks should be contained in OuterL.
// However, some of them might happen to be out of OuterL (e.g. if they
// precede a loop exit). In this case we might need to insert PHI nodes in
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 93157bd87c34..95db2fe8d310 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -1567,7 +1568,9 @@ Value *llvm::addRuntimeChecks(
auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp);
LLVMContext &Ctx = Loc->getContext();
- IRBuilder<> ChkBuilder(Loc);
+ IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
+ Loc->getModule()->getDataLayout());
+ ChkBuilder.SetInsertPoint(Loc);
// Our instructions might fold to a constant.
Value *MemoryRuntimeCheck = nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index 771b7d25b0f2..f0bf625fa18e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -15,6 +15,7 @@
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -70,17 +71,14 @@ void LoopVersioning::versionLoop(
"scev.check");
SCEVRuntimeCheck =
Exp.expandCodeForPredicate(&Preds, RuntimeCheckBB->getTerminator());
- auto *CI = dyn_cast<ConstantInt>(SCEVRuntimeCheck);
-
- // Discard the SCEV runtime check if it is always true.
- if (CI && CI->isZero())
- SCEVRuntimeCheck = nullptr;
+ IRBuilder<InstSimplifyFolder> Builder(
+ RuntimeCheckBB->getContext(),
+ InstSimplifyFolder(RuntimeCheckBB->getModule()->getDataLayout()));
if (MemRuntimeCheck && SCEVRuntimeCheck) {
- RuntimeCheck = BinaryOperator::Create(Instruction::Or, MemRuntimeCheck,
- SCEVRuntimeCheck, "lver.safe");
- if (auto *I = dyn_cast<Instruction>(RuntimeCheck))
- I->insertBefore(RuntimeCheckBB->getTerminator());
+ Builder.SetInsertPoint(RuntimeCheckBB->getTerminator());
+ RuntimeCheck =
+ Builder.CreateOr(MemRuntimeCheck, SCEVRuntimeCheck, "lver.safe");
} else
RuntimeCheck = MemRuntimeCheck ? MemRuntimeCheck : SCEVRuntimeCheck;
@@ -109,8 +107,9 @@ void LoopVersioning::versionLoop(
// Insert the conditional branch based on the result of the memchecks.
Instruction *OrigTerm = RuntimeCheckBB->getTerminator();
- BranchInst::Create(NonVersionedLoop->getLoopPreheader(),
- VersionedLoop->getLoopPreheader(), RuntimeCheck, OrigTerm);
+ Builder.SetInsertPoint(OrigTerm);
+ Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
+ VersionedLoop->getLoopPreheader());
OrigTerm->eraseFromParent();
// The loops merge in the original exit block. This is now dominated by the
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 8dc4702993c3..3d75dd57456d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -297,7 +297,7 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
Function *F = OrigBB->getParent();
const DataLayout &DL = F->getParent()->getDataLayout();
- Type *EltTy = cast<PointerType>(SrcAddr->getType())->getElementType();
+ Type *EltTy = SrcAddr->getType()->getPointerElementType();
// Create the a comparison of src and dst, based on which we jump to either
// the forward-copy part of the function (if src >= dst) or the backwards-copy
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index bb5ff59cba4b..7c9ab7f6ca2c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -178,66 +178,30 @@ llvm::getOrCreateSanitizerCtorAndInitFunctions(
}
void llvm::filterDeadComdatFunctions(
- Module &M, SmallVectorImpl<Function *> &DeadComdatFunctions) {
- // Build a map from the comdat to the number of entries in that comdat we
- // think are dead. If this fully covers the comdat group, then the entire
- // group is dead. If we find another entry in the comdat group though, we'll
- // have to preserve the whole group.
- SmallDenseMap<Comdat *, int, 16> ComdatEntriesCovered;
+ SmallVectorImpl<Function *> &DeadComdatFunctions) {
+ SmallPtrSet<Function *, 32> MaybeDeadFunctions;
+ SmallPtrSet<Comdat *, 32> MaybeDeadComdats;
for (Function *F : DeadComdatFunctions) {
- Comdat *C = F->getComdat();
- assert(C && "Expected all input GVs to be in a comdat!");
- ComdatEntriesCovered[C] += 1;
+ MaybeDeadFunctions.insert(F);
+ if (Comdat *C = F->getComdat())
+ MaybeDeadComdats.insert(C);
}
- auto CheckComdat = [&](Comdat &C) {
- auto CI = ComdatEntriesCovered.find(&C);
- if (CI == ComdatEntriesCovered.end())
- return;
-
- // If this could have been covered by a dead entry, just subtract one to
- // account for it.
- if (CI->second > 0) {
- CI->second -= 1;
- return;
- }
-
- // If we've already accounted for all the entries that were dead, the
- // entire comdat is alive so remove it from the map.
- ComdatEntriesCovered.erase(CI);
- };
-
- auto CheckAllComdats = [&] {
- for (Function &F : M.functions())
- if (Comdat *C = F.getComdat()) {
- CheckComdat(*C);
- if (ComdatEntriesCovered.empty())
- return;
- }
- for (GlobalVariable &GV : M.globals())
- if (Comdat *C = GV.getComdat()) {
- CheckComdat(*C);
- if (ComdatEntriesCovered.empty())
- return;
- }
- for (GlobalAlias &GA : M.aliases())
- if (Comdat *C = GA.getComdat()) {
- CheckComdat(*C);
- if (ComdatEntriesCovered.empty())
- return;
- }
- };
- CheckAllComdats();
-
- if (ComdatEntriesCovered.empty()) {
- DeadComdatFunctions.clear();
- return;
+ // Find comdats for which all users are dead now.
+ SmallPtrSet<Comdat *, 32> DeadComdats;
+ for (Comdat *C : MaybeDeadComdats) {
+ auto IsUserDead = [&](GlobalObject *GO) {
+ auto *F = dyn_cast<Function>(GO);
+ return F && MaybeDeadFunctions.contains(F);
+ };
+ if (all_of(C->getUsers(), IsUserDead))
+ DeadComdats.insert(C);
}
- // Remove the entries that were not covering.
- erase_if(DeadComdatFunctions, [&](GlobalValue *GV) {
- return ComdatEntriesCovered.find(GV->getComdat()) ==
- ComdatEntriesCovered.end();
+ // Only keep functions which have no comdat or a dead comdat.
+ erase_if(DeadComdatFunctions, [&](Function *F) {
+ Comdat *C = F->getComdat();
+ return C && !DeadComdats.contains(C);
});
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
index 2f2dff6b5f0b..961adf2570a7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SampleProfileInference.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/Support/Debug.h"
#include <queue>
#include <set>
@@ -144,7 +145,7 @@ public:
/// A cost of decreasing the entry block's count by one.
static constexpr int64_t AuxCostDecEntry = 10;
/// A cost of taking an unlikely jump.
- static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20;
+ static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
private:
/// Check for existence of an augmenting path with a positive capacity.
@@ -236,7 +237,7 @@ private:
}
}
- /// An node in a flow network.
+ /// A node in a flow network.
struct Node {
/// The cost of the cheapest path from the source to the current node.
int64_t Distance;
@@ -303,13 +304,10 @@ public:
rebalanceUnknownSubgraphs();
}
- /// The probability for the first successor of a unknown subgraph
- static constexpr double UnknownFirstSuccProbability = 0.5;
-
private:
void joinIsolatedComponents() {
// Find blocks that are reachable from the source
- auto Visited = std::vector<bool>(NumBlocks(), false);
+ auto Visited = BitVector(NumBlocks(), false);
findReachable(Func.Entry, Visited);
// Iterate over all non-reachable blocks and adjust their weights
@@ -334,7 +332,7 @@ private:
/// Run BFS from a given block along the jumps with a positive flow and mark
/// all reachable blocks.
- void findReachable(uint64_t Src, std::vector<bool> &Visited) {
+ void findReachable(uint64_t Src, BitVector &Visited) {
if (Visited[Src])
return;
std::queue<uint64_t> Queue;
@@ -452,44 +450,70 @@ private:
uint64_t NumBlocks() const { return Func.Blocks.size(); }
- /// Rebalance unknown subgraphs so as each branch splits with probabilities
- /// UnknownFirstSuccProbability and 1 - UnknownFirstSuccProbability
+ /// Rebalance unknown subgraphs so that the flow is split evenly across the
+ /// outgoing branches of every block of the subgraph. The method iterates over
+ /// blocks with known weight and identifies unknown subgraphs rooted at the
+ /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
void rebalanceUnknownSubgraphs() {
- assert(UnknownFirstSuccProbability >= 0.0 &&
- UnknownFirstSuccProbability <= 1.0 &&
- "the share of the unknown successor should be between 0 and 1");
- // Try to find unknown subgraphs from each non-unknown block
+ // Try to find unknown subgraphs from each block
for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
auto SrcBlock = &Func.Blocks[I];
- // Do not attempt to find unknown successors from a unknown or a
- // zero-flow block
- if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
+ // Verify if rebalancing rooted at SrcBlock is feasible
+ if (!canRebalanceAtRoot(SrcBlock))
continue;
- std::vector<FlowBlock *> UnknownSuccs;
+ // Find an unknown subgraphs starting at SrcBlock. Along the way,
+ // fill in known destinations and intermediate unknown blocks.
+ std::vector<FlowBlock *> UnknownBlocks;
+ std::vector<FlowBlock *> KnownDstBlocks;
+ findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks);
+
+ // Verify if rebalancing of the subgraph is feasible. If the search is
+ // successful, find the unique destination block (which can be null)
FlowBlock *DstBlock = nullptr;
- // Find a unknown subgraphs starting at block SrcBlock
- if (!findUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs))
+ if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks,
+ DstBlock))
continue;
- // At the moment, we do not rebalance subgraphs containing cycles among
- // unknown blocks
- if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownSuccs))
+
+ // We cannot rebalance subgraphs containing cycles among unknown blocks
+ if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks))
continue;
// Rebalance the flow
- rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs);
+ rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks);
}
}
- /// Find a unknown subgraph starting at block SrcBlock.
- /// If the search is successful, the method sets DstBlock and UnknownSuccs.
- bool findUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *&DstBlock,
- std::vector<FlowBlock *> &UnknownSuccs) {
+ /// Verify if rebalancing rooted at a given block is possible.
+ bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
+ // Do not attempt to find unknown subgraphs from an unknown or a
+ // zero-flow block
+ if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
+ return false;
+
+ // Do not attempt to process subgraphs from a block w/o unknown sucessors
+ bool HasUnknownSuccs = false;
+ for (auto Jump : SrcBlock->SuccJumps) {
+ if (Func.Blocks[Jump->Target].UnknownWeight) {
+ HasUnknownSuccs = true;
+ break;
+ }
+ }
+ if (!HasUnknownSuccs)
+ return false;
+
+ return true;
+ }
+
+ /// Find an unknown subgraph starting at block SrcBlock. The method sets
+ /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
+ void findUnknownSubgraph(const FlowBlock *SrcBlock,
+ std::vector<FlowBlock *> &KnownDstBlocks,
+ std::vector<FlowBlock *> &UnknownBlocks) {
// Run BFS from SrcBlock and make sure all paths are going through unknown
// blocks and end at a non-unknown DstBlock
- auto Visited = std::vector<bool>(NumBlocks(), false);
+ auto Visited = BitVector(NumBlocks(), false);
std::queue<uint64_t> Queue;
- DstBlock = nullptr;
Queue.push(SrcBlock->Index);
Visited[SrcBlock->Index] = true;
@@ -498,52 +522,105 @@ private:
Queue.pop();
// Process blocks reachable from Block
for (auto Jump : Block.SuccJumps) {
+ // If Jump can be ignored, skip it
+ if (ignoreJump(SrcBlock, nullptr, Jump))
+ continue;
+
uint64_t Dst = Jump->Target;
+ // If Dst has been visited, skip Jump
if (Visited[Dst])
continue;
+ // Process block Dst
Visited[Dst] = true;
if (!Func.Blocks[Dst].UnknownWeight) {
- // If we see non-unique non-unknown block reachable from SrcBlock,
- // stop processing and skip rebalancing
- FlowBlock *CandidateDstBlock = &Func.Blocks[Dst];
- if (DstBlock != nullptr && DstBlock != CandidateDstBlock)
- return false;
- DstBlock = CandidateDstBlock;
+ KnownDstBlocks.push_back(&Func.Blocks[Dst]);
} else {
Queue.push(Dst);
- UnknownSuccs.push_back(&Func.Blocks[Dst]);
+ UnknownBlocks.push_back(&Func.Blocks[Dst]);
}
}
}
+ }
+ /// Verify if rebalancing of the subgraph is feasible. If the checks are
+ /// successful, set the unique destination block, DstBlock (can be null).
+ bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
+ const std::vector<FlowBlock *> &KnownDstBlocks,
+ const std::vector<FlowBlock *> &UnknownBlocks,
+ FlowBlock *&DstBlock) {
// If the list of unknown blocks is empty, we don't need rebalancing
- if (UnknownSuccs.empty())
+ if (UnknownBlocks.empty())
return false;
- // If all reachable nodes from SrcBlock are unknown, skip rebalancing
- if (DstBlock == nullptr)
+
+ // If there are multiple known sinks, we can't rebalance
+ if (KnownDstBlocks.size() > 1)
return false;
- // If any of the unknown blocks is an exit block, skip rebalancing
- for (auto Block : UnknownSuccs) {
- if (Block->isExit())
+ DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
+
+ // Verify sinks of the subgraph
+ for (auto Block : UnknownBlocks) {
+ if (Block->SuccJumps.empty()) {
+ // If there are multiple (known and unknown) sinks, we can't rebalance
+ if (DstBlock != nullptr)
+ return false;
+ continue;
+ }
+ size_t NumIgnoredJumps = 0;
+ for (auto Jump : Block->SuccJumps) {
+ if (ignoreJump(SrcBlock, DstBlock, Jump))
+ NumIgnoredJumps++;
+ }
+ // If there is a non-sink block in UnknownBlocks with all jumps ignored,
+ // then we can't rebalance
+ if (NumIgnoredJumps == Block->SuccJumps.size())
return false;
}
return true;
}
+ /// Decide whether the Jump is ignored while processing an unknown subgraphs
+ /// rooted at basic block SrcBlock with the destination block, DstBlock.
+ bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
+ const FlowJump *Jump) {
+ // Ignore unlikely jumps with zero flow
+ if (Jump->IsUnlikely && Jump->Flow == 0)
+ return true;
+
+ auto JumpSource = &Func.Blocks[Jump->Source];
+ auto JumpTarget = &Func.Blocks[Jump->Target];
+
+ // Do not ignore jumps coming into DstBlock
+ if (DstBlock != nullptr && JumpTarget == DstBlock)
+ return false;
+
+ // Ignore jumps out of SrcBlock to known blocks
+ if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock)
+ return true;
+
+ // Ignore jumps to known blocks with zero flow
+ if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0)
+ return true;
+
+ return false;
+ }
+
/// Verify if the given unknown subgraph is acyclic, and if yes, reorder
- /// UnknownSuccs in the topological order (so that all jumps are "forward").
- bool isAcyclicSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock,
- std::vector<FlowBlock *> &UnknownSuccs) {
+ /// UnknownBlocks in the topological order (so that all jumps are "forward").
+ bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
+ std::vector<FlowBlock *> &UnknownBlocks) {
// Extract local in-degrees in the considered subgraph
auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
- for (auto Jump : SrcBlock->SuccJumps) {
- LocalInDegree[Jump->Target]++;
- }
- for (uint64_t I = 0; I < UnknownSuccs.size(); I++) {
- for (auto Jump : UnknownSuccs[I]->SuccJumps) {
+ auto fillInDegree = [&](const FlowBlock *Block) {
+ for (auto Jump : Block->SuccJumps) {
+ if (ignoreJump(SrcBlock, DstBlock, Jump))
+ continue;
LocalInDegree[Jump->Target]++;
}
+ };
+ fillInDegree(SrcBlock);
+ for (auto Block : UnknownBlocks) {
+ fillInDegree(Block);
}
// A loop containing SrcBlock
if (LocalInDegree[SrcBlock->Index] > 0)
@@ -553,15 +630,20 @@ private:
std::queue<uint64_t> Queue;
Queue.push(SrcBlock->Index);
while (!Queue.empty()) {
- auto &Block = Func.Blocks[Queue.front()];
+ FlowBlock *Block = &Func.Blocks[Queue.front()];
Queue.pop();
- // Stop propagation once we reach DstBlock
- if (Block.Index == DstBlock->Index)
+ // Stop propagation once we reach DstBlock, if any
+ if (DstBlock != nullptr && Block == DstBlock)
break;
- AcyclicOrder.push_back(&Block);
+ // Keep an acyclic order of unknown blocks
+ if (Block->UnknownWeight && Block != SrcBlock)
+ AcyclicOrder.push_back(Block);
+
// Add to the queue all successors with zero local in-degree
- for (auto Jump : Block.SuccJumps) {
+ for (auto Jump : Block->SuccJumps) {
+ if (ignoreJump(SrcBlock, DstBlock, Jump))
+ continue;
uint64_t Dst = Jump->Target;
LocalInDegree[Dst]--;
if (LocalInDegree[Dst] == 0) {
@@ -572,42 +654,69 @@ private:
// If there is a cycle in the subgraph, AcyclicOrder contains only a subset
// of all blocks
- if (UnknownSuccs.size() + 1 != AcyclicOrder.size())
+ if (UnknownBlocks.size() != AcyclicOrder.size())
return false;
- UnknownSuccs = AcyclicOrder;
+ UnknownBlocks = AcyclicOrder;
return true;
}
- /// Rebalance a given subgraph.
- void rebalanceUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock,
- std::vector<FlowBlock *> &UnknownSuccs) {
+ /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
+ /// having UnknownBlocks intermediate blocks.
+ void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
+ const FlowBlock *DstBlock,
+ const std::vector<FlowBlock *> &UnknownBlocks) {
assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
- assert(UnknownSuccs.front() == SrcBlock && "incorrect order of unknowns");
- for (auto Block : UnknownSuccs) {
+ // Ditribute flow from the source block
+ uint64_t BlockFlow = 0;
+ // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
+ for (auto Jump : SrcBlock->SuccJumps) {
+ if (ignoreJump(SrcBlock, DstBlock, Jump))
+ continue;
+ BlockFlow += Jump->Flow;
+ }
+ rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
+
+ // Ditribute flow from the remaining blocks
+ for (auto Block : UnknownBlocks) {
+ assert(Block->UnknownWeight && "incorrect unknown subgraph");
+ uint64_t BlockFlow = 0;
// Block's flow is the sum of incoming flows
- uint64_t TotalFlow = 0;
- if (Block == SrcBlock) {
- TotalFlow = Block->Flow;
- } else {
- for (auto Jump : Block->PredJumps) {
- TotalFlow += Jump->Flow;
- }
- Block->Flow = TotalFlow;
+ for (auto Jump : Block->PredJumps) {
+ BlockFlow += Jump->Flow;
}
+ Block->Flow = BlockFlow;
+ rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
+ }
+ }
- // Process all successor jumps and update corresponding flow values
- for (uint64_t I = 0; I < Block->SuccJumps.size(); I++) {
- auto Jump = Block->SuccJumps[I];
- if (I + 1 == Block->SuccJumps.size()) {
- Jump->Flow = TotalFlow;
- continue;
- }
- uint64_t Flow = uint64_t(TotalFlow * UnknownFirstSuccProbability);
- Jump->Flow = Flow;
- TotalFlow -= Flow;
- }
+ /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
+ /// and ending at DstBlock.
+ void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
+ const FlowBlock *Block, uint64_t BlockFlow) {
+ // Process all successor jumps and update corresponding flow values
+ size_t BlockDegree = 0;
+ for (auto Jump : Block->SuccJumps) {
+ if (ignoreJump(SrcBlock, DstBlock, Jump))
+ continue;
+ BlockDegree++;
+ }
+ // If all successor jumps of the block are ignored, skip it
+ if (DstBlock == nullptr && BlockDegree == 0)
+ return;
+ assert(BlockDegree > 0 && "all outgoing jumps are ignored");
+
+ // Each of the Block's successors gets the following amount of flow.
+ // Rounding the value up so that all flow is propagated
+ uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
+ for (auto Jump : Block->SuccJumps) {
+ if (ignoreJump(SrcBlock, DstBlock, Jump))
+ continue;
+ uint64_t Flow = std::min(SuccFlow, BlockFlow);
+ Jump->Flow = Flow;
+ BlockFlow -= Flow;
}
+ assert(BlockFlow == 0 && "not all flow is propagated");
}
/// A constant indicating an arbitrary exit block of a function.
@@ -799,7 +908,7 @@ void verifyWeights(const FlowFunction &Func) {
// Run BFS from the source along edges with positive flow
std::queue<uint64_t> Queue;
- auto Visited = std::vector<bool>(NumBlocks, false);
+ auto Visited = BitVector(NumBlocks, false);
Queue.push(Func.Entry);
Visited[Func.Entry] = true;
while (!Queue.empty()) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index c840ee85795f..5363a851fc27 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -173,7 +173,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
auto *PtrTy = cast<PointerType>(Ty);
if (DL.isNonIntegralPointerType(PtrTy)) {
auto *Int8PtrTy = Builder.getInt8PtrTy(PtrTy->getAddressSpace());
- assert(DL.getTypeAllocSize(Int8PtrTy->getElementType()) == 1 &&
+ assert(DL.getTypeAllocSize(Builder.getInt8Ty()) == 1 &&
"alloc size of i8 must by 1 byte for the GEP to be correct");
auto *GEP = Builder.CreateGEP(
Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "uglygep");
@@ -471,7 +471,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// indexes into the array implied by the pointer operand; the rest of
// the indices index into the element or field type selected by the
// preceding index.
- Type *ElTy = PTy->getElementType();
+ Type *ElTy = PTy->getNonOpaquePointerElementType();
for (;;) {
// If the scale size is not 0, attempt to factor out a scale for
// array indexing.
@@ -640,8 +640,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
Value *Casted = V;
if (V->getType() != PTy)
Casted = InsertNoopCastOfTo(Casted, PTy);
- Value *GEP = Builder.CreateGEP(PTy->getElementType(), Casted, GepIndices,
- "scevgep");
+ Value *GEP = Builder.CreateGEP(PTy->getNonOpaquePointerElementType(),
+ Casted, GepIndices, "scevgep");
Ops.push_back(SE.getUnknown(GEP));
}
@@ -1671,7 +1671,7 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
return Builder.CreateSExt(V, Ty);
}
-Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
+Value *SCEVExpander::expandSMaxExpr(const SCEVNAryExpr *S) {
Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
Type *Ty = LHS->getType();
for (int i = S->getNumOperands()-2; i >= 0; --i) {
@@ -1700,7 +1700,7 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
return LHS;
}
-Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
+Value *SCEVExpander::expandUMaxExpr(const SCEVNAryExpr *S) {
Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
Type *Ty = LHS->getType();
for (int i = S->getNumOperands()-2; i >= 0; --i) {
@@ -1729,7 +1729,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
return LHS;
}
-Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
+Value *SCEVExpander::expandSMinExpr(const SCEVNAryExpr *S) {
Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
Type *Ty = LHS->getType();
for (int i = S->getNumOperands() - 2; i >= 0; --i) {
@@ -1758,7 +1758,7 @@ Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
return LHS;
}
-Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
+Value *SCEVExpander::expandUMinExpr(const SCEVNAryExpr *S) {
Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
Type *Ty = LHS->getType();
for (int i = S->getNumOperands() - 2; i >= 0; --i) {
@@ -1787,6 +1787,40 @@ Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
return LHS;
}
+Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
+ return expandSMaxExpr(S);
+}
+
+Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
+ return expandUMaxExpr(S);
+}
+
+Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
+ return expandSMinExpr(S);
+}
+
+Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
+ return expandUMinExpr(S);
+}
+
+Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) {
+ SmallVector<Value *> Ops;
+ for (const SCEV *Op : S->operands())
+ Ops.emplace_back(expand(Op));
+
+ Value *SaturationPoint =
+ MinMaxIntrinsic::getSaturationPoint(Intrinsic::umin, S->getType());
+
+ SmallVector<Value *> OpIsZero;
+ for (Value *Op : ArrayRef<Value *>(Ops).drop_back())
+ OpIsZero.emplace_back(Builder.CreateICmpEQ(Op, SaturationPoint));
+
+ Value *AnyOpIsZero = Builder.CreateLogicalOr(OpIsZero);
+
+ Value *NaiveUMin = expandUMinExpr(S);
+ return Builder.CreateSelect(AnyOpIsZero, SaturationPoint, NaiveUMin);
+}
+
Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty,
Instruction *IP, bool Root) {
setInsertPoint(IP);
@@ -1809,8 +1843,8 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) {
// instruction.
Instruction *Tmp;
if (Inst->getType()->isIntegerTy())
- Tmp =
- cast<Instruction>(Builder.CreateAdd(Inst, Inst, "tmp.lcssa.user"));
+ Tmp = cast<Instruction>(Builder.CreateIntToPtr(
+ Inst, Inst->getType()->getPointerTo(), "tmp.lcssa.user"));
else {
assert(Inst->getType()->isPointerTy());
Tmp = cast<Instruction>(Builder.CreatePtrToInt(
@@ -1947,22 +1981,14 @@ Value *SCEVExpander::expand(const SCEV *S) {
if (VO.second) {
if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) {
- Type *Ety = Vty->getPointerElementType();
int64_t Offset = VO.second->getSExtValue();
- int64_t ESize = SE.getTypeSizeInBits(Ety);
- if ((Offset * 8) % ESize == 0) {
- ConstantInt *Idx =
- ConstantInt::getSigned(VO.second->getType(), -(Offset * 8) / ESize);
- V = Builder.CreateGEP(Ety, V, Idx, "scevgep");
- } else {
- ConstantInt *Idx =
- ConstantInt::getSigned(VO.second->getType(), -Offset);
- unsigned AS = Vty->getAddressSpace();
- V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS));
- V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx,
- "uglygep");
- V = Builder.CreateBitCast(V, Vty);
- }
+ ConstantInt *Idx =
+ ConstantInt::getSigned(VO.second->getType(), -Offset);
+ unsigned AS = Vty->getAddressSpace();
+ V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS));
+ V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx,
+ "uglygep");
+ V = Builder.CreateBitCast(V, Vty);
} else {
V = Builder.CreateSub(V, VO.second);
}
@@ -2271,10 +2297,27 @@ template<typename T> static InstructionCost costAndCollectOperands(
case scSMaxExpr:
case scUMaxExpr:
case scSMinExpr:
- case scUMinExpr: {
+ case scUMinExpr:
+ case scSequentialUMinExpr: {
// FIXME: should this ask the cost for Intrinsic's?
+ // The reduction tree.
Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1);
Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2);
+ switch (S->getSCEVType()) {
+ case scSequentialUMinExpr: {
+ // The safety net against poison.
+ // FIXME: this is broken.
+ Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 0);
+ Cost += ArithCost(Instruction::Or,
+ S->getNumOperands() > 2 ? S->getNumOperands() - 2 : 0);
+ Cost += CmpSelCost(Instruction::Select, 1, 0, 1);
+ break;
+ }
+ default:
+ assert(!isa<SCEVSequentialMinMaxExpr>(S) &&
+ "Unhandled SCEV expression type?");
+ break;
+ }
break;
}
case scAddRecExpr: {
@@ -2362,7 +2405,7 @@ bool SCEVExpander::isHighCostExpansionHelper(
case scConstant: {
// Only evalulate the costs of constants when optimizing for size.
if (CostKind != TargetTransformInfo::TCK_CodeSize)
- return 0;
+ return false;
const APInt &Imm = cast<SCEVConstant>(S)->getAPInt();
Type *Ty = S->getType();
Cost += TTI.getIntImmCostInst(
@@ -2399,7 +2442,8 @@ bool SCEVExpander::isHighCostExpansionHelper(
case scUMaxExpr:
case scSMaxExpr:
case scUMinExpr:
- case scSMinExpr: {
+ case scSMinExpr:
+ case scSequentialUMinExpr: {
assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 &&
"Nary expr should have more than 1 operand.");
// The simple nary expr will require one less op (or pair of ops)
@@ -2490,49 +2534,73 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero);
Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue);
- // Get the backedge taken count and truncate or extended to the AR type.
- Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
-
// Compute |Step| * Backedge
- Value *MulV, *OfMul;
- if (Step->isOne()) {
- // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
- // needed, there is never an overflow, so to avoid artificially inflating
- // the cost of the check, directly emit the optimized IR.
- MulV = TruncTripCount;
- OfMul = ConstantInt::getFalse(MulV->getContext());
- } else {
- auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
- Intrinsic::umul_with_overflow, Ty);
- CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
- MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
- OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
- }
-
// Compute:
- // Start + |Step| * Backedge < Start
- // Start - |Step| * Backedge > Start
- Value *Add = nullptr, *Sub = nullptr;
- if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
- StartValue = InsertNoopCastOfTo(
- StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
- Value *NegMulV = Builder.CreateNeg(MulV);
- Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
- Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
- } else {
- Add = Builder.CreateAdd(StartValue, MulV);
- Sub = Builder.CreateSub(StartValue, MulV);
- }
-
- Value *EndCompareGT = Builder.CreateICmp(
- Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
+ // 1. Start + |Step| * Backedge < Start
+ // 2. Start - |Step| * Backedge > Start
+ //
+ // And select either 1. or 2. depending on whether step is positive or
+ // negative. If Step is known to be positive or negative, only create
+ // either 1. or 2.
+ auto ComputeEndCheck = [&]() -> Value * {
+ // Checking <u 0 is always false.
+ if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
+ return ConstantInt::getFalse(Loc->getContext());
+
+ // Get the backedge taken count and truncate or extended to the AR type.
+ Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
+
+ Value *MulV, *OfMul;
+ if (Step->isOne()) {
+ // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
+ // needed, there is never an overflow, so to avoid artificially inflating
+ // the cost of the check, directly emit the optimized IR.
+ MulV = TruncTripCount;
+ OfMul = ConstantInt::getFalse(MulV->getContext());
+ } else {
+ auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
+ Intrinsic::umul_with_overflow, Ty);
+ CallInst *Mul =
+ Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
+ MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
+ OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
+ }
- Value *EndCompareLT = Builder.CreateICmp(
- Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
+ Value *Add = nullptr, *Sub = nullptr;
+ bool NeedPosCheck = !SE.isKnownNegative(Step);
+ bool NeedNegCheck = !SE.isKnownPositive(Step);
+
+ if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
+ StartValue = InsertNoopCastOfTo(
+ StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
+ Value *NegMulV = Builder.CreateNeg(MulV);
+ if (NeedPosCheck)
+ Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
+ if (NeedNegCheck)
+ Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
+ } else {
+ if (NeedPosCheck)
+ Add = Builder.CreateAdd(StartValue, MulV);
+ if (NeedNegCheck)
+ Sub = Builder.CreateSub(StartValue, MulV);
+ }
- // Select the answer based on the sign of Step.
- Value *EndCheck =
- Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
+ Value *EndCompareLT = nullptr;
+ Value *EndCompareGT = nullptr;
+ Value *EndCheck = nullptr;
+ if (NeedPosCheck)
+ EndCheck = EndCompareLT = Builder.CreateICmp(
+ Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
+ if (NeedNegCheck)
+ EndCheck = EndCompareGT = Builder.CreateICmp(
+ Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
+ if (NeedPosCheck && NeedNegCheck) {
+ // Select the answer based on the sign of Step.
+ EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
+ }
+ return Builder.CreateOr(EndCheck, OfMul);
+ };
+ Value *EndCheck = ComputeEndCheck();
// If the backedge taken count type is larger than the AR type,
// check that we don't drop any bits by truncating it. If we are
@@ -2548,7 +2616,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck);
}
- return Builder.CreateOr(EndCheck, OfMul);
+ return EndCheck;
}
Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred,
@@ -2578,17 +2646,16 @@ Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred,
Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union,
Instruction *IP) {
- auto *BoolType = IntegerType::get(IP->getContext(), 1);
- Value *Check = ConstantInt::getNullValue(BoolType);
-
// Loop over all checks in this set.
+ SmallVector<Value *> Checks;
for (auto Pred : Union->getPredicates()) {
- auto *NextCheck = expandCodeForPredicate(Pred, IP);
+ Checks.push_back(expandCodeForPredicate(Pred, IP));
Builder.SetInsertPoint(IP);
- Check = Builder.CreateOr(Check, NextCheck);
}
- return Check;
+ if (Checks.empty())
+ return ConstantInt::getFalse(IP->getContext());
+ return Builder.CreateOr(Checks);
}
Value *SCEVExpander::fixupLCSSAFormFor(Instruction *User, unsigned OpIdx) {
@@ -2720,13 +2787,8 @@ void SCEVExpanderCleaner::cleanup() {
// Remove sets with value handles.
Expander.clear();
- // Sort so that earlier instructions do not dominate later instructions.
- stable_sort(InsertedInstructions, [this](Instruction *A, Instruction *B) {
- return DT.dominates(B, A);
- });
// Remove all inserted instructions.
- for (Instruction *I : InsertedInstructions) {
-
+ for (Instruction *I : reverse(InsertedInstructions)) {
#ifndef NDEBUG
assert(all_of(I->users(),
[&InsertedSet](Value *U) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 1046998c26de..335ac03ccb52 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2052,109 +2052,119 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
if (ScanIdx == 0)
return false;
- // Okay, we *could* sink last ScanIdx instructions. But how many can we
- // actually sink before encountering instruction that is unprofitable to sink?
- auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
- unsigned NumPHIdValues = 0;
- for (auto *I : *LRI)
- for (auto *V : PHIOperands[I]) {
- if (!InstructionsToSink.contains(V))
- ++NumPHIdValues;
- // FIXME: this check is overly optimistic. We may end up not sinking
- // said instruction, due to the very same profitability check.
- // See @creating_too_many_phis in sink-common-code.ll.
- }
- LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
- unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
- if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
+ bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
+
+ if (!followedByDeoptOrUnreachable) {
+ // Okay, we *could* sink last ScanIdx instructions. But how many can we
+ // actually sink before encountering instruction that is unprofitable to
+ // sink?
+ auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
+ unsigned NumPHIdValues = 0;
+ for (auto *I : *LRI)
+ for (auto *V : PHIOperands[I]) {
+ if (!InstructionsToSink.contains(V))
+ ++NumPHIdValues;
+ // FIXME: this check is overly optimistic. We may end up not sinking
+ // said instruction, due to the very same profitability check.
+ // See @creating_too_many_phis in sink-common-code.ll.
+ }
+ LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
+ unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
+ if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
NumPHIInsts++;
- return NumPHIInsts <= 1;
- };
+ return NumPHIInsts <= 1;
+ };
- // We've determined that we are going to sink last ScanIdx instructions,
- // and recorded them in InstructionsToSink. Now, some instructions may be
- // unprofitable to sink. But that determination depends on the instructions
- // that we are going to sink.
-
- // First, forward scan: find the first instruction unprofitable to sink,
- // recording all the ones that are profitable to sink.
- // FIXME: would it be better, after we detect that not all are profitable.
- // to either record the profitable ones, or erase the unprofitable ones?
- // Maybe we need to choose (at runtime) the one that will touch least instrs?
- LRI.reset();
- int Idx = 0;
- SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
- while (Idx < ScanIdx) {
- if (!ProfitableToSinkInstruction(LRI)) {
- // Too many PHIs would be created.
- LLVM_DEBUG(
- dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
- break;
+ // We've determined that we are going to sink last ScanIdx instructions,
+ // and recorded them in InstructionsToSink. Now, some instructions may be
+ // unprofitable to sink. But that determination depends on the instructions
+ // that we are going to sink.
+
+ // First, forward scan: find the first instruction unprofitable to sink,
+ // recording all the ones that are profitable to sink.
+ // FIXME: would it be better, after we detect that not all are profitable.
+ // to either record the profitable ones, or erase the unprofitable ones?
+ // Maybe we need to choose (at runtime) the one that will touch least
+ // instrs?
+ LRI.reset();
+ int Idx = 0;
+ SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
+ while (Idx < ScanIdx) {
+ if (!ProfitableToSinkInstruction(LRI)) {
+ // Too many PHIs would be created.
+ LLVM_DEBUG(
+ dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
+ break;
+ }
+ InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
+ --LRI;
+ ++Idx;
}
- InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
- --LRI;
- ++Idx;
- }
- // If no instructions can be sunk, early-return.
- if (Idx == 0)
- return false;
+ // If no instructions can be sunk, early-return.
+ if (Idx == 0)
+ return false;
- // Did we determine that (only) some instructions are unprofitable to sink?
- if (Idx < ScanIdx) {
- // Okay, some instructions are unprofitable.
- ScanIdx = Idx;
- InstructionsToSink = InstructionsProfitableToSink;
-
- // But, that may make other instructions unprofitable, too.
- // So, do a backward scan, do any earlier instructions become unprofitable?
- assert(!ProfitableToSinkInstruction(LRI) &&
- "We already know that the last instruction is unprofitable to sink");
- ++LRI;
- --Idx;
- while (Idx >= 0) {
- // If we detect that an instruction becomes unprofitable to sink,
- // all earlier instructions won't be sunk either,
- // so preemptively keep InstructionsProfitableToSink in sync.
- // FIXME: is this the most performant approach?
- for (auto *I : *LRI)
- InstructionsProfitableToSink.erase(I);
- if (!ProfitableToSinkInstruction(LRI)) {
- // Everything starting with this instruction won't be sunk.
- ScanIdx = Idx;
- InstructionsToSink = InstructionsProfitableToSink;
- }
+ // Did we determine that (only) some instructions are unprofitable to sink?
+ if (Idx < ScanIdx) {
+ // Okay, some instructions are unprofitable.
+ ScanIdx = Idx;
+ InstructionsToSink = InstructionsProfitableToSink;
+
+ // But, that may make other instructions unprofitable, too.
+ // So, do a backward scan, do any earlier instructions become
+ // unprofitable?
+ assert(
+ !ProfitableToSinkInstruction(LRI) &&
+ "We already know that the last instruction is unprofitable to sink");
++LRI;
--Idx;
+ while (Idx >= 0) {
+ // If we detect that an instruction becomes unprofitable to sink,
+ // all earlier instructions won't be sunk either,
+ // so preemptively keep InstructionsProfitableToSink in sync.
+ // FIXME: is this the most performant approach?
+ for (auto *I : *LRI)
+ InstructionsProfitableToSink.erase(I);
+ if (!ProfitableToSinkInstruction(LRI)) {
+ // Everything starting with this instruction won't be sunk.
+ ScanIdx = Idx;
+ InstructionsToSink = InstructionsProfitableToSink;
+ }
+ ++LRI;
+ --Idx;
+ }
}
- }
- // If no instructions can be sunk, early-return.
- if (ScanIdx == 0)
- return false;
+ // If no instructions can be sunk, early-return.
+ if (ScanIdx == 0)
+ return false;
+ }
bool Changed = false;
if (HaveNonUnconditionalPredecessors) {
- // It is always legal to sink common instructions from unconditional
- // predecessors. However, if not all predecessors are unconditional,
- // this transformation might be pessimizing. So as a rule of thumb,
- // don't do it unless we'd sink at least one non-speculatable instruction.
- // See https://bugs.llvm.org/show_bug.cgi?id=30244
- LRI.reset();
- int Idx = 0;
- bool Profitable = false;
- while (Idx < ScanIdx) {
- if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
- Profitable = true;
- break;
+ if (!followedByDeoptOrUnreachable) {
+ // It is always legal to sink common instructions from unconditional
+ // predecessors. However, if not all predecessors are unconditional,
+ // this transformation might be pessimizing. So as a rule of thumb,
+ // don't do it unless we'd sink at least one non-speculatable instruction.
+ // See https://bugs.llvm.org/show_bug.cgi?id=30244
+ LRI.reset();
+ int Idx = 0;
+ bool Profitable = false;
+ while (Idx < ScanIdx) {
+ if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
+ Profitable = true;
+ break;
+ }
+ --LRI;
+ ++Idx;
}
- --LRI;
- ++Idx;
+ if (!Profitable)
+ return false;
}
- if (!Profitable)
- return false;
LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
// We have a conditional edge and we're going to sink some instructions.
@@ -4935,14 +4945,13 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
AssumptionCache *AC,
const DataLayout &DL) {
Value *Cond = SI->getCondition();
- unsigned Bits = Cond->getType()->getIntegerBitWidth();
KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
// We can also eliminate cases by determining that their values are outside of
// the limited range of the condition based on how many significant (non-sign)
// bits are in the condition value.
- unsigned ExtraSignBits = ComputeNumSignBits(Cond, DL, 0, AC, SI) - 1;
- unsigned MaxSignificantBitsInCond = Bits - ExtraSignBits;
+ unsigned MaxSignificantBitsInCond =
+ ComputeMaxSignificantBits(Cond, DL, 0, AC, SI);
// Gather dead cases.
SmallVector<ConstantInt *, 8> DeadCases;
@@ -4973,8 +4982,8 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
bool HasDefault =
!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
const unsigned NumUnknownBits =
- Bits - (Known.Zero | Known.One).countPopulation();
- assert(NumUnknownBits <= Bits);
+ Known.getBitWidth() - (Known.Zero | Known.One).countPopulation();
+ assert(NumUnknownBits <= Known.getBitWidth());
if (HasDefault && DeadCases.empty() &&
NumUnknownBits < 64 /* avoid overflow */ &&
SI->getNumCases() == (1ULL << NumUnknownBits)) {
@@ -5796,10 +5805,9 @@ static void reuseTableCompare(
for (auto ValuePair : Values) {
Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
ValuePair.second, CmpOp1, true);
- if (!CaseConst || CaseConst == DefaultConst || isa<UndefValue>(CaseConst))
+ if (!CaseConst || CaseConst == DefaultConst ||
+ (CaseConst != TrueConst && CaseConst != FalseConst))
return;
- assert((CaseConst == TrueConst || CaseConst == FalseConst) &&
- "Expect true or false as compare result.");
}
// Check if the branch instruction dominates the phi node. It's a simple
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 02727a3dbf9c..e02d02a05752 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -602,7 +602,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) {
Align MemSetAlign =
CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne();
CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, MemSetAlign);
- AttrBuilder ArgAttrs(CI->getAttributes().getParamAttrs(0));
+ AttrBuilder ArgAttrs(CI->getContext(), CI->getAttributes().getParamAttrs(0));
NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
CI->getContext(), 0, ArgAttrs));
copyFlags(*CI, NewCI);
@@ -2515,8 +2515,9 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
} else if (Value *V = emitStpCpy(Dest, CI->getArgOperand(2), B, TLI)) {
// sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest
// Handle mismatched pointer types (goes away with typeless pointers?).
- V = B.CreatePointerCast(V, Dest->getType());
- Value *PtrDiff = B.CreatePtrDiff(V, Dest);
+ V = B.CreatePointerCast(V, B.getInt8PtrTy());
+ Dest = B.CreatePointerCast(Dest, B.getInt8PtrTy());
+ Value *PtrDiff = B.CreatePtrDiff(B.getInt8Ty(), V, Dest);
return B.CreateIntCast(PtrDiff, CI->getType(), false);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp
index b822db938af8..8947303674ee 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -398,13 +398,17 @@ Value *Mapper::mapValue(const Value *V) {
SmallVector<ValueAsMetadata *, 4> MappedArgs;
for (auto *VAM : AL->getArgs()) {
// Map both Local and Constant VAMs here; they will both ultimately
- // be mapped via mapValue (apart from constants when we have no
- // module level changes, which have an identity mapping).
+ // be mapped via mapValue. The exceptions are constants when we have no
+ // module level changes and locals when they have no existing mapped
+ // value and RF_IgnoreMissingLocals is set; these have identity
+ // mappings.
if ((Flags & RF_NoModuleLevelChanges) && isa<ConstantAsMetadata>(VAM)) {
MappedArgs.push_back(VAM);
} else if (Value *LV = mapValue(VAM->getValue())) {
MappedArgs.push_back(
LV == VAM->getValue() ? VAM : ValueAsMetadata::get(LV));
+ } else if ((Flags & RF_IgnoreMissingLocals) && isa<LocalAsMetadata>(VAM)) {
+ MappedArgs.push_back(VAM);
} else {
// If we cannot map the value, set the argument as undef.
MappedArgs.push_back(ValueAsMetadata::get(