diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2022-01-27 22:17:16 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2022-06-04 11:59:19 +0000 |
| commit | 390adc38fc112be360bd15499e5241bf4e675b6f (patch) | |
| tree | 712d68d3aa03f7aa4902ba03dcac2a56f49ae0e5 /contrib/llvm-project/llvm/lib/Transforms/Utils | |
| parent | 8a84287b0edc66fc6dede3db770d10ff41da5464 (diff) | |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Utils')
21 files changed, 797 insertions, 678 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp index fdc914a72bfd..c734611836eb 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp @@ -22,19 +22,6 @@ using namespace llvm; #define DEBUG_TYPE "amdgpu-emit-printf" -static bool isCString(const Value *Arg) { - auto Ty = Arg->getType(); - auto PtrTy = dyn_cast<PointerType>(Ty); - if (!PtrTy) - return false; - - auto IntTy = dyn_cast<IntegerType>(PtrTy->getElementType()); - if (!IntTy) - return false; - - return IntTy->getBitWidth() == 8; -} - static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) { auto Int64Ty = Builder.getInt64Ty(); auto Ty = Arg->getType(); @@ -176,13 +163,15 @@ static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str, static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg, bool IsLast) { + Arg = Builder.CreateBitCast( + Arg, Builder.getInt8PtrTy(Arg->getType()->getPointerAddressSpace())); auto Length = getStrlenWithNull(Builder, Arg); return callAppendStringN(Builder, Desc, Arg, Length, IsLast); } static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, bool SpecIsCString, bool IsLast) { - if (SpecIsCString && isCString(Arg)) { + if (SpecIsCString && isa<PointerType>(Arg->getType())) { return appendString(Builder, Desc, Arg, IsLast); } // If the format specifies a string but the argument is not, the frontend will diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 580cfd80141e..97f11ca71726 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -34,6 +34,7 @@ STATISTIC(NumReadNone, "Number of functions inferred as readnone"); STATISTIC(NumInaccessibleMemOnly, "Number of functions inferred as inaccessiblememonly"); STATISTIC(NumReadOnly, "Number of functions inferred as readonly"); +STATISTIC(NumWriteOnly, "Number of functions inferred as writeonly"); STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly"); STATISTIC(NumInaccessibleMemOrArgMemOnly, "Number of functions inferred as inaccessiblemem_or_argmemonly"); @@ -71,6 +72,19 @@ static bool setOnlyReadsMemory(Function &F) { return true; } +static bool setOnlyWritesMemory(Function &F) { + if (F.onlyWritesMemory()) // writeonly or readnone + return false; + // Turn readonly and writeonly into readnone. + if (F.hasFnAttribute(Attribute::ReadOnly)) { + F.removeFnAttr(Attribute::ReadOnly); + return setDoesNotAccessMemory(F); + } + ++NumWriteOnly; + F.setOnlyWritesMemory(); + return true; +} + static bool setOnlyAccessesArgMemory(Function &F) { if (F.onlyAccessesArgMemory()) return false; @@ -233,6 +247,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { switch (TheLibFunc) { case LibFunc_strlen: + case LibFunc_strnlen: case LibFunc_wcslen: Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); @@ -400,6 +415,8 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; + case LibFunc_aligned_alloc: + case LibFunc_valloc: case LibFunc_malloc: case LibFunc_vec_malloc: Changed |= setOnlyAccessesInaccessibleMemory(F); @@ -484,6 +501,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_realloc: case LibFunc_vec_realloc: + case LibFunc_reallocf: Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); @@ -492,11 +510,6 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 0); Changed |= setArgNoUndef(F, 1); return Changed; - case LibFunc_reallocf: - Changed |= setRetNoUndef(F); - Changed |= setWillReturn(F); - Changed |= setArgNoUndef(F, 1); - return Changed; case LibFunc_read: // May throw; "read" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); @@ -536,13 +549,6 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc_aligned_alloc: - Changed |= setOnlyAccessesInaccessibleMemory(F); - Changed |= setRetAndArgsNoUndef(F); - Changed |= setDoesNotThrow(F); - Changed |= setRetDoesNotAlias(F); - Changed |= setWillReturn(F); - return Changed; case LibFunc_bcopy: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); @@ -569,6 +575,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_calloc: case LibFunc_vec_calloc: + Changed |= setOnlyAccessesInaccessibleMemory(F); Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); @@ -851,13 +858,6 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc_valloc: - Changed |= setOnlyAccessesInaccessibleMemory(F); - Changed |= setRetAndArgsNoUndef(F); - Changed |= setDoesNotThrow(F); - Changed |= setRetDoesNotAlias(F); - Changed |= setWillReturn(F); - return Changed; case LibFunc_vprintf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -1020,12 +1020,10 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_memset_pattern4: case LibFunc_memset_pattern8: case LibFunc_memset_pattern16: - Changed |= setOnlyAccessesArgMemory(F); Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + LLVM_FALLTHROUGH; case LibFunc_memset: Changed |= setWillReturn(F); LLVM_FALLTHROUGH; @@ -1158,7 +1156,6 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: - case LibFunc_strnlen: case LibFunc_tan: case LibFunc_tanf: case LibFunc_tanh: @@ -1171,6 +1168,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_truncl: Changed |= setDoesNotThrow(F); Changed |= setDoesNotFreeMemory(F); + Changed |= setOnlyWritesMemory(F); Changed |= setWillReturn(F); return Changed; default: diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp index b2763900e154..ac3839f2a4ab 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp @@ -20,8 +20,7 @@ using namespace llvm; bool CallGraphUpdater::finalize() { if (!DeadFunctionsInComdats.empty()) { - filterDeadComdatFunctions(*DeadFunctionsInComdats.front()->getParent(), - DeadFunctionsInComdats); + filterDeadComdatFunctions(DeadFunctionsInComdats); DeadFunctions.append(DeadFunctionsInComdats.begin(), DeadFunctionsInComdats.end()); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp index ebe19f1751e5..56b6e4bc46a5 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -500,7 +500,7 @@ CallBase &llvm::promoteCall(CallBase &CB, Function *Callee, CB.setArgOperand(ArgNo, Cast); // Remove any incompatible attributes for the argument. - AttrBuilder ArgAttrs(CallerPAL.getParamAttrs(ArgNo)); + AttrBuilder ArgAttrs(Ctx, CallerPAL.getParamAttrs(ArgNo)); ArgAttrs.remove(AttributeFuncs::typeIncompatible(FormalTy)); // We may have a different byval/inalloca type. @@ -518,7 +518,7 @@ CallBase &llvm::promoteCall(CallBase &CB, Function *Callee, // If the return type of the call site doesn't match that of the callee, cast // the returned value to the appropriate type. // Remove any incompatible return value attribute. - AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex); + AttrBuilder RAttrs(Ctx, CallerPAL.getRetAttrs()); if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy) { createRetBitCast(CB, CallSiteRetTy, RetBitCast); RAttrs.remove(AttributeFuncs::typeIncompatible(CalleeRetTy)); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 96aff563aa9b..24cd5747c5a4 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -829,39 +829,54 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, default: RetTy = Type::getInt16Ty(header->getContext()); break; } - std::vector<Type *> paramTy; + std::vector<Type *> ParamTy; + std::vector<Type *> AggParamTy; + ValueSet StructValues; // Add the types of the input values to the function's argument list for (Value *value : inputs) { LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n"); - paramTy.push_back(value->getType()); + if (AggregateArgs && !ExcludeArgsFromAggregate.contains(value)) { + AggParamTy.push_back(value->getType()); + StructValues.insert(value); + } else + ParamTy.push_back(value->getType()); } // Add the types of the output values to the function's argument list. for (Value *output : outputs) { LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n"); - if (AggregateArgs) - paramTy.push_back(output->getType()); - else - paramTy.push_back(PointerType::getUnqual(output->getType())); + if (AggregateArgs && !ExcludeArgsFromAggregate.contains(output)) { + AggParamTy.push_back(output->getType()); + StructValues.insert(output); + } else + ParamTy.push_back(PointerType::getUnqual(output->getType())); + } + + assert( + (ParamTy.size() + AggParamTy.size()) == + (inputs.size() + outputs.size()) && + "Number of scalar and aggregate params does not match inputs, outputs"); + assert(StructValues.empty() || + AggregateArgs && "Expeced StructValues only with AggregateArgs set"); + + // Concatenate scalar and aggregate params in ParamTy. + size_t NumScalarParams = ParamTy.size(); + StructType *StructTy = nullptr; + if (AggregateArgs && !AggParamTy.empty()) { + StructTy = StructType::get(M->getContext(), AggParamTy); + ParamTy.push_back(PointerType::getUnqual(StructTy)); } LLVM_DEBUG({ dbgs() << "Function type: " << *RetTy << " f("; - for (Type *i : paramTy) + for (Type *i : ParamTy) dbgs() << *i << ", "; dbgs() << ")\n"; }); - StructType *StructTy = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - StructTy = StructType::get(M->getContext(), paramTy); - paramTy.clear(); - paramTy.push_back(PointerType::getUnqual(StructTy)); - } - FunctionType *funcType = - FunctionType::get(RetTy, paramTy, - AllowVarArgs && oldFunction->isVarArg()); + FunctionType *funcType = FunctionType::get( + RetTy, ParamTy, AllowVarArgs && oldFunction->isVarArg()); std::string SuffixToUse = Suffix.empty() @@ -871,13 +886,6 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, Function *newFunction = Function::Create( funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), oldFunction->getName() + "." + SuffixToUse, M); - // If the old function is no-throw, so is the new one. - if (oldFunction->doesNotThrow()) - newFunction->setDoesNotThrow(); - - // Inherit the uwtable attribute if we need to. - if (oldFunction->hasUWTable()) - newFunction->setHasUWTable(); // Inherit all of the target dependent attributes and white-listed // target independent attributes. @@ -893,53 +901,26 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, } else switch (Attr.getKindAsEnum()) { // Those attributes cannot be propagated safely. Explicitly list them - // here so we get a warning if new attributes are added. This list also - // includes non-function attributes. - case Attribute::Alignment: + // here so we get a warning if new attributes are added. case Attribute::AllocSize: case Attribute::ArgMemOnly: case Attribute::Builtin: - case Attribute::ByVal: case Attribute::Convergent: - case Attribute::Dereferenceable: - case Attribute::DereferenceableOrNull: - case Attribute::ElementType: - case Attribute::InAlloca: - case Attribute::InReg: case Attribute::InaccessibleMemOnly: case Attribute::InaccessibleMemOrArgMemOnly: case Attribute::JumpTable: case Attribute::Naked: - case Attribute::Nest: - case Attribute::NoAlias: case Attribute::NoBuiltin: - case Attribute::NoCapture: case Attribute::NoMerge: case Attribute::NoReturn: case Attribute::NoSync: - case Attribute::NoUndef: - case Attribute::None: - case Attribute::NonNull: - case Attribute::Preallocated: case Attribute::ReadNone: case Attribute::ReadOnly: - case Attribute::Returned: case Attribute::ReturnsTwice: - case Attribute::SExt: case Attribute::Speculatable: case Attribute::StackAlignment: - case Attribute::StructRet: - case Attribute::SwiftError: - case Attribute::SwiftSelf: - case Attribute::SwiftAsync: case Attribute::WillReturn: case Attribute::WriteOnly: - case Attribute::ZExt: - case Attribute::ImmArg: - case Attribute::ByRef: - case Attribute::EndAttrKinds: - case Attribute::EmptyKey: - case Attribute::TombstoneKey: continue; // Those attributes should be safe to propagate to the extracted function. case Attribute::AlwaysInline: @@ -980,30 +961,62 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::MustProgress: case Attribute::NoProfile: break; + // These attributes cannot be applied to functions. + case Attribute::Alignment: + case Attribute::ByVal: + case Attribute::Dereferenceable: + case Attribute::DereferenceableOrNull: + case Attribute::ElementType: + case Attribute::InAlloca: + case Attribute::InReg: + case Attribute::Nest: + case Attribute::NoAlias: + case Attribute::NoCapture: + case Attribute::NoUndef: + case Attribute::NonNull: + case Attribute::Preallocated: + case Attribute::Returned: + case Attribute::SExt: + case Attribute::StructRet: + case Attribute::SwiftError: + case Attribute::SwiftSelf: + case Attribute::SwiftAsync: + case Attribute::ZExt: + case Attribute::ImmArg: + case Attribute::ByRef: + // These are not really attributes. + case Attribute::None: + case Attribute::EndAttrKinds: + case Attribute::EmptyKey: + case Attribute::TombstoneKey: + llvm_unreachable("Not a function attribute"); } newFunction->addFnAttr(Attr); } newFunction->getBasicBlockList().push_back(newRootNode); - // Create an iterator to name all of the arguments we inserted. - Function::arg_iterator AI = newFunction->arg_begin(); + // Create scalar and aggregate iterators to name all of the arguments we + // inserted. + Function::arg_iterator ScalarAI = newFunction->arg_begin(); + Function::arg_iterator AggAI = std::next(ScalarAI, NumScalarParams); // Rewrite all users of the inputs in the extracted region to use the // arguments (or appropriate addressing into struct) instead. - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + for (unsigned i = 0, e = inputs.size(), aggIdx = 0; i != e; ++i) { Value *RewriteVal; - if (AggregateArgs) { + if (AggregateArgs && StructValues.contains(inputs[i])) { Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); - Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); + Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), aggIdx); Instruction *TI = newFunction->begin()->getTerminator(); GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); - RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, + StructTy, &*AggAI, Idx, "gep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(StructTy->getElementType(aggIdx), GEP, "loadgep_" + inputs[i]->getName(), TI); + ++aggIdx; } else - RewriteVal = &*AI++; + RewriteVal = &*ScalarAI++; std::vector<User *> Users(inputs[i]->user_begin(), inputs[i]->user_end()); for (User *use : Users) @@ -1013,12 +1026,14 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, } // Set names for input and output arguments. - if (!AggregateArgs) { - AI = newFunction->arg_begin(); - for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) - AI->setName(inputs[i]->getName()); - for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) - AI->setName(outputs[i]->getName()+".out"); + if (NumScalarParams) { + ScalarAI = newFunction->arg_begin(); + for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++ScalarAI) + if (!StructValues.contains(inputs[i])) + ScalarAI->setName(inputs[i]->getName()); + for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++ScalarAI) + if (!StructValues.contains(outputs[i])) + ScalarAI->setName(outputs[i]->getName() + ".out"); } // Rewrite branches to basic blocks outside of the loop to new dummy blocks @@ -1126,7 +1141,8 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, ValueSet &outputs) { // Emit a call to the new function, passing in: *pointer to struct (if // aggregating parameters), or plan inputs and allocated memory for outputs - std::vector<Value *> params, StructValues, ReloadOutputs, Reloads; + std::vector<Value *> params, ReloadOutputs, Reloads; + ValueSet StructValues; Module *M = newFunction->getParent(); LLVMContext &Context = M->getContext(); @@ -1134,23 +1150,24 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, CallInst *call = nullptr; // Add inputs as params, or to be filled into the struct - unsigned ArgNo = 0; + unsigned ScalarInputArgNo = 0; SmallVector<unsigned, 1> SwiftErrorArgs; for (Value *input : inputs) { - if (AggregateArgs) - StructValues.push_back(input); + if (AggregateArgs && !ExcludeArgsFromAggregate.contains(input)) + StructValues.insert(input); else { params.push_back(input); if (input->isSwiftError()) - SwiftErrorArgs.push_back(ArgNo); + SwiftErrorArgs.push_back(ScalarInputArgNo); } - ++ArgNo; + ++ScalarInputArgNo; } // Create allocas for the outputs + unsigned ScalarOutputArgNo = 0; for (Value *output : outputs) { - if (AggregateArgs) { - StructValues.push_back(output); + if (AggregateArgs && !ExcludeArgsFromAggregate.contains(output)) { + StructValues.insert(output); } else { AllocaInst *alloca = new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), @@ -1158,12 +1175,14 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, &codeReplacer->getParent()->front().front()); ReloadOutputs.push_back(alloca); params.push_back(alloca); + ++ScalarOutputArgNo; } } StructType *StructArgTy = nullptr; AllocaInst *Struct = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + unsigned NumAggregatedInputs = 0; + if (AggregateArgs && !StructValues.empty()) { std::vector<Type *> ArgTypes; for (Value *V : StructValues) ArgTypes.push_back(V->getType()); @@ -1175,14 +1194,18 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, &codeReplacer->getParent()->front().front()); params.push_back(Struct); - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - new StoreInst(StructValues[i], GEP, codeReplacer); + // Store aggregated inputs in the struct. + for (unsigned i = 0, e = StructValues.size(); i != e; ++i) { + if (inputs.contains(StructValues[i])) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + new StoreInst(StructValues[i], GEP, codeReplacer); + NumAggregatedInputs++; + } } } @@ -1205,24 +1228,24 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); } - Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); - unsigned FirstOut = inputs.size(); - if (!AggregateArgs) - std::advance(OutputArgBegin, inputs.size()); - - // Reload the outputs passed in by reference. - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + // Reload the outputs passed in by reference, use the struct if output is in + // the aggregate or reload from the scalar argument. + for (unsigned i = 0, e = outputs.size(), scalarIdx = 0, + aggIdx = NumAggregatedInputs; + i != e; ++i) { Value *Output = nullptr; - if (AggregateArgs) { + if (AggregateArgs && StructValues.contains(outputs[i])) { Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), aggIdx); GetElementPtrInst *GEP = GetElementPtrInst::Create( StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); codeReplacer->getInstList().push_back(GEP); Output = GEP; + ++aggIdx; } else { - Output = ReloadOutputs[i]; + Output = ReloadOutputs[scalarIdx]; + ++scalarIdx; } LoadInst *load = new LoadInst(outputs[i]->getType(), Output, outputs[i]->getName() + ".reload", @@ -1304,8 +1327,13 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, // Store the arguments right after the definition of output value. // This should be proceeded after creating exit stubs to be ensure that invoke // result restore will be placed in the outlined function. - Function::arg_iterator OAI = OutputArgBegin; - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Function::arg_iterator ScalarOutputArgBegin = newFunction->arg_begin(); + std::advance(ScalarOutputArgBegin, ScalarInputArgNo); + Function::arg_iterator AggOutputArgBegin = newFunction->arg_begin(); + std::advance(AggOutputArgBegin, ScalarInputArgNo + ScalarOutputArgNo); + + for (unsigned i = 0, e = outputs.size(), aggIdx = NumAggregatedInputs; i != e; + ++i) { auto *OutI = dyn_cast<Instruction>(outputs[i]); if (!OutI) continue; @@ -1325,23 +1353,27 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, assert((InsertBefore->getFunction() == newFunction || Blocks.count(InsertBefore->getParent())) && "InsertPt should be in new function"); - assert(OAI != newFunction->arg_end() && - "Number of output arguments should match " - "the amount of defined values"); - if (AggregateArgs) { + if (AggregateArgs && StructValues.contains(outputs[i])) { + assert(AggOutputArgBegin != newFunction->arg_end() && + "Number of aggregate output arguments should match " + "the number of defined values"); Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), aggIdx); GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), + StructArgTy, &*AggOutputArgBegin, Idx, "gep_" + outputs[i]->getName(), InsertBefore); new StoreInst(outputs[i], GEP, InsertBefore); + ++aggIdx; // Since there should be only one struct argument aggregating - // all the output values, we shouldn't increment OAI, which always - // points to the struct argument, in this case. + // all the output values, we shouldn't increment AggOutputArgBegin, which + // always points to the struct argument, in this case. } else { - new StoreInst(outputs[i], &*OAI, InsertBefore); - ++OAI; + assert(ScalarOutputArgBegin != newFunction->arg_end() && + "Number of scalar output arguments should match " + "the number of defined values"); + new StoreInst(outputs[i], &*ScalarOutputArgBegin, InsertBefore); + ++ScalarOutputArgBegin; } } @@ -1840,3 +1872,7 @@ bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc, } return false; } + +void CodeExtractor::excludeArgFromAggregate(Value *Arg) { + ExcludeArgsFromAggregate.insert(Arg); +} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp index 91630d876fc8..e73287c060ae 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -122,129 +122,114 @@ isSimpleEnoughValueToCommit(Constant *C, return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL); } -/// Return true if this constant is simple enough for us to understand. In -/// particular, if it is a cast to anything other than from one pointer type to -/// another pointer type, we punt. We basically just support direct accesses to -/// globals and GEP's of globals. This should be kept up to date with -/// CommitValueTo. -static bool isSimpleEnoughPointerToCommit(Constant *C, const DataLayout &DL) { - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) - // Do not allow weak/*_odr/linkonce linkage or external globals. - return GV->hasUniqueInitializer(); - - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { - // Handle a constantexpr gep. - if (CE->getOpcode() == Instruction::GetElementPtr && - isa<GlobalVariable>(CE->getOperand(0)) && - cast<GEPOperator>(CE)->isInBounds()) { - GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); - // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or - // external globals. - if (!GV->hasUniqueInitializer()) - return false; +void Evaluator::MutableValue::clear() { + if (auto *Agg = Val.dyn_cast<MutableAggregate *>()) + delete Agg; + Val = nullptr; +} - // The first index must be zero. - ConstantInt *CI = dyn_cast<ConstantInt>(*std::next(CE->op_begin())); - if (!CI || !CI->isZero()) return false; +Constant *Evaluator::MutableValue::read(Type *Ty, APInt Offset, + const DataLayout &DL) const { + TypeSize TySize = DL.getTypeStoreSize(Ty); + const MutableValue *V = this; + while (const auto *Agg = V->Val.dyn_cast<MutableAggregate *>()) { + Type *AggTy = Agg->Ty; + Optional<APInt> Index = DL.getGEPIndexForOffset(AggTy, Offset); + if (!Index || Index->uge(Agg->Elements.size()) || + !TypeSize::isKnownLE(TySize, DL.getTypeStoreSize(AggTy))) + return nullptr; + + V = &Agg->Elements[Index->getZExtValue()]; + } - // The remaining indices must be compile-time known integers within the - // notional bounds of the corresponding static array types. - if (!CE->isGEPWithNoNotionalOverIndexing()) - return false; + return ConstantFoldLoadFromConst(V->Val.get<Constant *>(), Ty, Offset, DL); +} - return ConstantFoldLoadThroughGEPConstantExpr( - GV->getInitializer(), CE, - cast<GEPOperator>(CE)->getResultElementType(), DL); - } else if (CE->getOpcode() == Instruction::BitCast && - isa<GlobalVariable>(CE->getOperand(0))) { - // A constantexpr bitcast from a pointer to another pointer is a no-op, - // and we know how to evaluate it by moving the bitcast from the pointer - // operand to the value operand. - // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or - // external globals. - return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer(); - } - } +bool Evaluator::MutableValue::makeMutable() { + Constant *C = Val.get<Constant *>(); + Type *Ty = C->getType(); + unsigned NumElements; + if (auto *VT = dyn_cast<FixedVectorType>(Ty)) { + NumElements = VT->getNumElements(); + } else if (auto *AT = dyn_cast<ArrayType>(Ty)) + NumElements = AT->getNumElements(); + else if (auto *ST = dyn_cast<StructType>(Ty)) + NumElements = ST->getNumElements(); + else + return false; - return false; + MutableAggregate *MA = new MutableAggregate(Ty); + MA->Elements.reserve(NumElements); + for (unsigned I = 0; I < NumElements; ++I) + MA->Elements.push_back(C->getAggregateElement(I)); + Val = MA; + return true; } -/// Apply \p TryLoad to Ptr. If this returns \p nullptr, introspect the -/// pointer's type and walk down through the initial elements to obtain -/// additional pointers to try. Returns the first non-null return value from -/// \p TryLoad, or \p nullptr if the type can't be introspected further. -static Constant * -evaluateBitcastFromPtr(Constant *Ptr, const DataLayout &DL, - const TargetLibraryInfo *TLI, - std::function<Constant *(Constant *)> TryLoad) { - Constant *Val; - while (!(Val = TryLoad(Ptr))) { - // If Ty is a non-opaque struct, we can convert the pointer to the struct - // into a pointer to its first member. - // FIXME: This could be extended to support arrays as well. - Type *Ty = cast<PointerType>(Ptr->getType())->getElementType(); - if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isOpaque()) - break; - - IntegerType *IdxTy = IntegerType::get(Ty->getContext(), 32); - Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); - Constant *const IdxList[] = {IdxZero, IdxZero}; - - Ptr = ConstantExpr::getGetElementPtr(Ty, Ptr, IdxList); - Ptr = ConstantFoldConstant(Ptr, DL, TLI); +bool Evaluator::MutableValue::write(Constant *V, APInt Offset, + const DataLayout &DL) { + Type *Ty = V->getType(); + TypeSize TySize = DL.getTypeStoreSize(Ty); + MutableValue *MV = this; + while (Offset != 0 || + !CastInst::isBitOrNoopPointerCastable(Ty, MV->getType(), DL)) { + if (MV->Val.is<Constant *>() && !MV->makeMutable()) + return false; + + MutableAggregate *Agg = MV->Val.get<MutableAggregate *>(); + Type *AggTy = Agg->Ty; + Optional<APInt> Index = DL.getGEPIndexForOffset(AggTy, Offset); + if (!Index || Index->uge(Agg->Elements.size()) || + !TypeSize::isKnownLE(TySize, DL.getTypeStoreSize(AggTy))) + return false; + + MV = &Agg->Elements[Index->getZExtValue()]; } - return Val; + + Type *MVType = MV->getType(); + MV->clear(); + if (Ty->isIntegerTy() && MVType->isPointerTy()) + MV->Val = ConstantExpr::getIntToPtr(V, MVType); + else if (Ty->isPointerTy() && MVType->isIntegerTy()) + MV->Val = ConstantExpr::getPtrToInt(V, MVType); + else if (Ty != MVType) + MV->Val = ConstantExpr::getBitCast(V, MVType); + else + MV->Val = V; + return true; } -static Constant *getInitializer(Constant *C) { - auto *GV = dyn_cast<GlobalVariable>(C); - return GV && GV->hasDefinitiveInitializer() ? GV->getInitializer() : nullptr; +Constant *Evaluator::MutableAggregate::toConstant() const { + SmallVector<Constant *, 32> Consts; + for (const MutableValue &MV : Elements) + Consts.push_back(MV.toConstant()); + + if (auto *ST = dyn_cast<StructType>(Ty)) + return ConstantStruct::get(ST, Consts); + if (auto *AT = dyn_cast<ArrayType>(Ty)) + return ConstantArray::get(AT, Consts); + assert(isa<FixedVectorType>(Ty) && "Must be vector"); + return ConstantVector::get(Consts); } /// Return the value that would be computed by a load from P after the stores /// reflected by 'memory' have been performed. If we can't decide, return null. Constant *Evaluator::ComputeLoadResult(Constant *P, Type *Ty) { - // If this memory location has been recently stored, use the stored value: it - // is the most up-to-date. - auto TryFindMemLoc = [this](Constant *Ptr) { - return MutatedMemory.lookup(Ptr); - }; - - if (Constant *Val = TryFindMemLoc(P)) - return Val; - - // Access it. - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { - if (GV->hasDefinitiveInitializer()) - return GV->getInitializer(); + APInt Offset(DL.getIndexTypeSizeInBits(P->getType()), 0); + P = cast<Constant>(P->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true)); + Offset = Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(P->getType())); + auto *GV = dyn_cast<GlobalVariable>(P); + if (!GV) return nullptr; - } - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) { - switch (CE->getOpcode()) { - // Handle a constantexpr getelementptr. - case Instruction::GetElementPtr: - if (auto *I = getInitializer(CE->getOperand(0))) - return ConstantFoldLoadThroughGEPConstantExpr(I, CE, Ty, DL); - break; - // Handle a constantexpr bitcast. - case Instruction::BitCast: - // We're evaluating a load through a pointer that was bitcast to a - // different type. See if the "from" pointer has recently been stored. - // If it hasn't, we may still be able to find a stored pointer by - // introspecting the type. - Constant *Val = - evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, TryFindMemLoc); - if (!Val) - Val = getInitializer(CE->getOperand(0)); - if (Val) - return ConstantFoldLoadThroughBitcast( - Val, P->getType()->getPointerElementType(), DL); - break; - } - } + auto It = MutatedMemory.find(GV); + if (It != MutatedMemory.end()) + return It->second.read(Ty, Offset, DL); - return nullptr; // don't know how to evaluate. + if (!GV->hasDefinitiveInitializer()) + return nullptr; + return ConstantFoldLoadFromConst(GV->getInitializer(), Ty, Offset, DL); } static Function *getFunction(Constant *C) { @@ -260,17 +245,10 @@ static Function *getFunction(Constant *C) { Function * Evaluator::getCalleeWithFormalArgs(CallBase &CB, SmallVectorImpl<Constant *> &Formals) { - auto *V = CB.getCalledOperand(); + auto *V = CB.getCalledOperand()->stripPointerCasts(); if (auto *Fn = getFunction(getVal(V))) return getFormalParams(CB, Fn, Formals) ? Fn : nullptr; - - auto *CE = dyn_cast<ConstantExpr>(V); - if (!CE || CE->getOpcode() != Instruction::BitCast || - !getFormalParams(CB, getFunction(CE->getOperand(0)), Formals)) - return nullptr; - - return dyn_cast<Function>( - ConstantFoldLoadThroughBitcast(CE, CE->getOperand(0)->getType(), DL)); + return nullptr; } bool Evaluator::getFormalParams(CallBase &CB, Function *F, @@ -299,17 +277,13 @@ bool Evaluator::getFormalParams(CallBase &CB, Function *F, /// If call expression contains bitcast then we may need to cast /// evaluated return value to a type of the call expression. -Constant *Evaluator::castCallResultIfNeeded(Value *CallExpr, Constant *RV) { - ConstantExpr *CE = dyn_cast<ConstantExpr>(CallExpr); - if (!RV || !CE || CE->getOpcode() != Instruction::BitCast) +Constant *Evaluator::castCallResultIfNeeded(Type *ReturnType, Constant *RV) { + if (!RV || RV->getType() == ReturnType) return RV; - if (auto *FT = - dyn_cast<FunctionType>(CE->getType()->getPointerElementType())) { - RV = ConstantFoldLoadThroughBitcast(RV, FT->getReturnType(), DL); - if (!RV) - LLVM_DEBUG(dbgs() << "Failed to fold bitcast call expr\n"); - } + RV = ConstantFoldLoadThroughBitcast(RV, ReturnType, DL); + if (!RV) + LLVM_DEBUG(dbgs() << "Failed to fold bitcast call expr\n"); return RV; } @@ -337,68 +311,30 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB, Ptr = FoldedPtr; LLVM_DEBUG(dbgs() << "; To: " << *Ptr << "\n"); } - // Conservatively, avoid aggregate types. This is because we don't - // want to worry about them partially overlapping other stores. - if (!SI->getValueOperand()->getType()->isSingleValueType() || - !isSimpleEnoughPointerToCommit(Ptr, DL)) { - // If this is too complex for us to commit, reject it. - LLVM_DEBUG( - dbgs() << "Pointer is too complex for us to evaluate store."); + + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = cast<Constant>(Ptr->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true)); + Offset = Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(Ptr->getType())); + auto *GV = dyn_cast<GlobalVariable>(Ptr); + if (!GV || !GV->hasUniqueInitializer()) { + LLVM_DEBUG(dbgs() << "Store is not to global with unique initializer: " + << *Ptr << "\n"); return false; } - Constant *Val = getVal(SI->getOperand(0)); - // If this might be too difficult for the backend to handle (e.g. the addr // of one global variable divided by another) then we can't commit it. + Constant *Val = getVal(SI->getOperand(0)); if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) { LLVM_DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val << "\n"); return false; } - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { - if (CE->getOpcode() == Instruction::BitCast) { - LLVM_DEBUG(dbgs() - << "Attempting to resolve bitcast on constant ptr.\n"); - // If we're evaluating a store through a bitcast, then we need - // to pull the bitcast off the pointer type and push it onto the - // stored value. In order to push the bitcast onto the stored value, - // a bitcast from the pointer's element type to Val's type must be - // legal. If it's not, we can try introspecting the type to find a - // legal conversion. - - auto TryCastValTy = [&](Constant *P) -> Constant * { - // The conversion is illegal if the store is wider than the - // pointee proposed by `evaluateBitcastFromPtr`, since that would - // drop stores to other struct elements when the caller attempts to - // look through a struct's 0th element. - Type *NewTy = cast<PointerType>(P->getType())->getElementType(); - Type *STy = Val->getType(); - if (DL.getTypeSizeInBits(NewTy) < DL.getTypeSizeInBits(STy)) - return nullptr; - - if (Constant *FV = ConstantFoldLoadThroughBitcast(Val, NewTy, DL)) { - Ptr = P; - return FV; - } - return nullptr; - }; - - Constant *NewVal = - evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, TryCastValTy); - if (!NewVal) { - LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " - "evaluate.\n"); - return false; - } - - Val = NewVal; - LLVM_DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n"); - } - } - - MutatedMemory[Ptr] = Val; + auto Res = MutatedMemory.try_emplace(GV, GV->getInitializer()); + if (!Res.first->second.write(Val, Offset, DL)) + return false; } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { InstResult = ConstantExpr::get(BO->getOpcode(), getVal(BO->getOperand(0)), @@ -593,7 +529,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB, if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. if (Constant *C = ConstantFoldCall(&CB, Callee, Formals, TLI)) { - InstResult = castCallResultIfNeeded(CB.getCalledOperand(), C); + InstResult = castCallResultIfNeeded(CB.getType(), C); if (!InstResult) return false; LLVM_DEBUG(dbgs() << "Constant folded function call. Result: " @@ -617,7 +553,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB, return false; } ValueStack.pop_back(); - InstResult = castCallResultIfNeeded(CB.getCalledOperand(), RetVal); + InstResult = castCallResultIfNeeded(CB.getType(), RetVal); if (RetVal && !InstResult) return false; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp index 9bfc73e4ba6c..f8ec8c6ad426 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -66,8 +66,6 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, for (const Use &U : V->uses()) { const User *UR = U.getUser(); if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) { - GS.HasNonInstructionUser = true; - // If the result of the constantexpr isn't pointer type, then we won't // know to expect it in various places. Just reject early. if (!isa<PointerType>(CE->getType())) @@ -105,9 +103,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, // value, not an aggregate), keep more specific information about // stores. if (GS.StoredType != GlobalStatus::Stored) { - const Value *Ptr = SI->getPointerOperand(); - if (isa<ConstantExpr>(Ptr)) - Ptr = Ptr->stripPointerCasts(); + const Value *Ptr = SI->getPointerOperand()->stripPointerCasts(); if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { Value *StoredVal = SI->getOperand(0); @@ -174,12 +170,10 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, return true; // Any other non-load instruction might take address! } } else if (const Constant *C = dyn_cast<Constant>(UR)) { - GS.HasNonInstructionUser = true; // We might have a dead and dangling constant hanging off of here. if (!isSafeToDestroyConstant(C)) return true; } else { - GS.HasNonInstructionUser = true; // Otherwise must be some other user. return true; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp index 997667810580..c9f872f5b7e1 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1185,10 +1185,10 @@ static bool MayContainThrowingOrExitingCall(Instruction *Begin, static AttrBuilder IdentifyValidAttributes(CallBase &CB) { - AttrBuilder AB(CB.getAttributes(), AttributeList::ReturnIndex); - if (AB.empty()) + AttrBuilder AB(CB.getContext(), CB.getAttributes().getRetAttrs()); + if (!AB.hasAttributes()) return AB; - AttrBuilder Valid; + AttrBuilder Valid(CB.getContext()); // Only allow these white listed attributes to be propagated back to the // callee. This is because other attributes may only be valid on the call // itself, i.e. attributes such as signext and zeroext. @@ -1208,7 +1208,7 @@ static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) { return; AttrBuilder Valid = IdentifyValidAttributes(CB); - if (Valid.empty()) + if (!Valid.hasAttributes()) return; auto *CalledFunction = CB.getCalledFunction(); auto &Context = CalledFunction->getContext(); @@ -1667,7 +1667,7 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind, Module *Mod = CB.getModule(); assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function"); bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV, - IsClaimRV = !IsRetainRV; + IsUnsafeClaimRV = !IsRetainRV; for (auto *RI : Returns) { Value *RetOpnd = objcarc::GetRCIdentityRoot(RI->getOperand(0)); @@ -1694,7 +1694,7 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind, // and erase the autoreleaseRV call. // - If retainRV is attached to the call, just erase the autoreleaseRV // call. - if (IsClaimRV) { + if (IsUnsafeClaimRV) { Builder.SetInsertPoint(II); Function *IFn = Intrinsic::getDeclaration(Mod, Intrinsic::objc_release); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp index 668626fef933..72b864dc3e48 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -339,8 +339,10 @@ bool llvm::formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, #ifdef EXPENSIVE_CHECKS // Verify all sub-loops are in LCSSA form already. - for (Loop *SubLoop: L) + for (Loop *SubLoop: L) { + (void)SubLoop; // Silence unused variable warning. assert(SubLoop->isRecursivelyLCSSAForm(DT, *LI) && "Subloop not in LCSSA!"); + } #endif SmallVector<BasicBlock *, 8> ExitBlocks; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp index ecad79b68185..9f33d2f82732 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp @@ -492,7 +492,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, } } - if (isAllocLikeFn(I, TLI)) + if (isAllocationFn(I, TLI) && isAllocRemovable(cast<CallBase>(I), TLI)) return true; if (CallInst *CI = isFreeCall(I, TLI)) @@ -2189,8 +2189,8 @@ CallInst *llvm::createCallMatchingInvoke(InvokeInst *II) { return NewCall; } -/// changeToCall - Convert the specified invoke into a normal call. -void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) { +// changeToCall - Convert the specified invoke into a normal call. +CallInst *llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) { CallInst *NewCall = createCallMatchingInvoke(II); NewCall->takeName(II); NewCall->insertBefore(II); @@ -2207,6 +2207,7 @@ void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) { II->eraseFromParent(); if (DTU) DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}}); + return NewCall; } BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, @@ -3147,11 +3148,6 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( if (!ITy->isIntOrIntVectorTy() || ITy->getScalarSizeInBits() > 128) return false; // Can't do integer/elements > 128 bits. - Type *DemandedTy = ITy; - if (I->hasOneUse()) - if (auto *Trunc = dyn_cast<TruncInst>(I->user_back())) - DemandedTy = Trunc->getType(); - // Try to find all the pieces corresponding to the bswap. bool FoundRoot = false; std::map<Value *, Optional<BitPart>> BPS; @@ -3165,6 +3161,7 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( "Illegal bit provenance index"); // If the upper bits are zero, then attempt to perform as a truncated op. + Type *DemandedTy = ITy; if (BitProvenance.back() == BitPart::Unset) { while (!BitProvenance.empty() && BitProvenance.back() == BitPart::Unset) BitProvenance = BitProvenance.drop_back(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp index 69fd110dc3c2..92333408aaef 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -359,7 +359,7 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) { // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, - unsigned &TripCount, DominatorTree &DT, + unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, unsigned Threshold) { assert(LoopSize > 0 && "Zero loop size is not allowed!"); // Save the PP.PeelCount value set by the target in @@ -370,7 +370,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, return; // Only try to peel innermost loops by default. - // The constraint can be relaxed by the target in TTI.getUnrollingPreferences + // The constraint can be relaxed by the target in TTI.getPeelingPreferences // or by the flag -unroll-allow-loop-nests-peeling. if (!PP.AllowLoopNestsPeeling && !L->isInnermost()) return; @@ -407,8 +407,8 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, SmallDenseMap<PHINode *, Optional<unsigned> > IterationsToInvariance; // Now go through all Phis to calculate their the number of iterations they // need to become invariants. - // Start the max computation with the UP.PeelCount value set by the target - // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count. + // Start the max computation with the PP.PeelCount value set by the target + // in TTI.getPeelingPreferences or by the flag -unroll-peel-count. unsigned DesiredPeelCount = TargetPeelCount; BasicBlock *BackEdge = L->getLoopLatch(); assert(BackEdge && "Loop is not in simplified form?"); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp index b0c622b98d5e..9ca1f4f44b97 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -99,6 +99,17 @@ UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden, #endif ); +static cl::opt<bool> +UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden, + cl::desc("Verify loopinfo after unrolling"), +#ifdef EXPENSIVE_CHECKS + cl::init(true) +#else + cl::init(false) +#endif + ); + + /// Check if unrolling created a situation where we need to insert phi nodes to /// preserve LCSSA form. /// \param Blocks is a vector of basic blocks representing unrolled loop. @@ -764,6 +775,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // Apply updates to the DomTree. DT = &DTU.getDomTree(); + assert(!UnrollVerifyDomtree || + DT->verify(DominatorTree::VerificationLevel::Fast)); + // At this point, the code is well formed. We now simplify the unrolled loop, // doing constant propagation and dead code elimination as we go. simplifyLoopAfterUnroll(L, !CompletelyUnroll && ULO.Count > 1, LI, SE, DT, AC, @@ -777,6 +791,10 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (CompletelyUnroll) LI->erase(L); + // LoopInfo should not be valid, confirm that. + if (UnrollVerifyLoopInfo) + LI->verify(*DT); + // After complete unrolling most of the blocks should be contained in OuterL. // However, some of them might happen to be out of OuterL (e.g. if they // precede a loop exit). In this case we might need to insert PHI nodes in diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp index 93157bd87c34..95db2fe8d310 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstSimplifyFolder.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" @@ -1567,7 +1568,9 @@ Value *llvm::addRuntimeChecks( auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp); LLVMContext &Ctx = Loc->getContext(); - IRBuilder<> ChkBuilder(Loc); + IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx, + Loc->getModule()->getDataLayout()); + ChkBuilder.SetInsertPoint(Loc); // Our instructions might fold to a constant. Value *MemoryRuntimeCheck = nullptr; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp index 771b7d25b0f2..f0bf625fa18e 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp @@ -15,6 +15,7 @@ #include "llvm/Transforms/Utils/LoopVersioning.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/InstSimplifyFolder.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -70,17 +71,14 @@ void LoopVersioning::versionLoop( "scev.check"); SCEVRuntimeCheck = Exp.expandCodeForPredicate(&Preds, RuntimeCheckBB->getTerminator()); - auto *CI = dyn_cast<ConstantInt>(SCEVRuntimeCheck); - - // Discard the SCEV runtime check if it is always true. - if (CI && CI->isZero()) - SCEVRuntimeCheck = nullptr; + IRBuilder<InstSimplifyFolder> Builder( + RuntimeCheckBB->getContext(), + InstSimplifyFolder(RuntimeCheckBB->getModule()->getDataLayout())); if (MemRuntimeCheck && SCEVRuntimeCheck) { - RuntimeCheck = BinaryOperator::Create(Instruction::Or, MemRuntimeCheck, - SCEVRuntimeCheck, "lver.safe"); - if (auto *I = dyn_cast<Instruction>(RuntimeCheck)) - I->insertBefore(RuntimeCheckBB->getTerminator()); + Builder.SetInsertPoint(RuntimeCheckBB->getTerminator()); + RuntimeCheck = + Builder.CreateOr(MemRuntimeCheck, SCEVRuntimeCheck, "lver.safe"); } else RuntimeCheck = MemRuntimeCheck ? MemRuntimeCheck : SCEVRuntimeCheck; @@ -109,8 +107,9 @@ void LoopVersioning::versionLoop( // Insert the conditional branch based on the result of the memchecks. Instruction *OrigTerm = RuntimeCheckBB->getTerminator(); - BranchInst::Create(NonVersionedLoop->getLoopPreheader(), - VersionedLoop->getLoopPreheader(), RuntimeCheck, OrigTerm); + Builder.SetInsertPoint(OrigTerm); + Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(), + VersionedLoop->getLoopPreheader()); OrigTerm->eraseFromParent(); // The loops merge in the original exit block. This is now dominated by the diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp index 8dc4702993c3..3d75dd57456d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -297,7 +297,7 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr, Function *F = OrigBB->getParent(); const DataLayout &DL = F->getParent()->getDataLayout(); - Type *EltTy = cast<PointerType>(SrcAddr->getType())->getElementType(); + Type *EltTy = SrcAddr->getType()->getPointerElementType(); // Create the a comparison of src and dst, based on which we jump to either // the forward-copy part of the function (if src >= dst) or the backwards-copy diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp index bb5ff59cba4b..7c9ab7f6ca2c 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -178,66 +178,30 @@ llvm::getOrCreateSanitizerCtorAndInitFunctions( } void llvm::filterDeadComdatFunctions( - Module &M, SmallVectorImpl<Function *> &DeadComdatFunctions) { - // Build a map from the comdat to the number of entries in that comdat we - // think are dead. If this fully covers the comdat group, then the entire - // group is dead. If we find another entry in the comdat group though, we'll - // have to preserve the whole group. - SmallDenseMap<Comdat *, int, 16> ComdatEntriesCovered; + SmallVectorImpl<Function *> &DeadComdatFunctions) { + SmallPtrSet<Function *, 32> MaybeDeadFunctions; + SmallPtrSet<Comdat *, 32> MaybeDeadComdats; for (Function *F : DeadComdatFunctions) { - Comdat *C = F->getComdat(); - assert(C && "Expected all input GVs to be in a comdat!"); - ComdatEntriesCovered[C] += 1; + MaybeDeadFunctions.insert(F); + if (Comdat *C = F->getComdat()) + MaybeDeadComdats.insert(C); } - auto CheckComdat = [&](Comdat &C) { - auto CI = ComdatEntriesCovered.find(&C); - if (CI == ComdatEntriesCovered.end()) - return; - - // If this could have been covered by a dead entry, just subtract one to - // account for it. - if (CI->second > 0) { - CI->second -= 1; - return; - } - - // If we've already accounted for all the entries that were dead, the - // entire comdat is alive so remove it from the map. - ComdatEntriesCovered.erase(CI); - }; - - auto CheckAllComdats = [&] { - for (Function &F : M.functions()) - if (Comdat *C = F.getComdat()) { - CheckComdat(*C); - if (ComdatEntriesCovered.empty()) - return; - } - for (GlobalVariable &GV : M.globals()) - if (Comdat *C = GV.getComdat()) { - CheckComdat(*C); - if (ComdatEntriesCovered.empty()) - return; - } - for (GlobalAlias &GA : M.aliases()) - if (Comdat *C = GA.getComdat()) { - CheckComdat(*C); - if (ComdatEntriesCovered.empty()) - return; - } - }; - CheckAllComdats(); - - if (ComdatEntriesCovered.empty()) { - DeadComdatFunctions.clear(); - return; + // Find comdats for which all users are dead now. + SmallPtrSet<Comdat *, 32> DeadComdats; + for (Comdat *C : MaybeDeadComdats) { + auto IsUserDead = [&](GlobalObject *GO) { + auto *F = dyn_cast<Function>(GO); + return F && MaybeDeadFunctions.contains(F); + }; + if (all_of(C->getUsers(), IsUserDead)) + DeadComdats.insert(C); } - // Remove the entries that were not covering. - erase_if(DeadComdatFunctions, [&](GlobalValue *GV) { - return ComdatEntriesCovered.find(GV->getComdat()) == - ComdatEntriesCovered.end(); + // Only keep functions which have no comdat or a dead comdat. + erase_if(DeadComdatFunctions, [&](Function *F) { + Comdat *C = F->getComdat(); + return C && !DeadComdats.contains(C); }); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp index 2f2dff6b5f0b..961adf2570a7 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SampleProfileInference.h" +#include "llvm/ADT/BitVector.h" #include "llvm/Support/Debug.h" #include <queue> #include <set> @@ -144,7 +145,7 @@ public: /// A cost of decreasing the entry block's count by one. static constexpr int64_t AuxCostDecEntry = 10; /// A cost of taking an unlikely jump. - static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20; + static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30; private: /// Check for existence of an augmenting path with a positive capacity. @@ -236,7 +237,7 @@ private: } } - /// An node in a flow network. + /// A node in a flow network. struct Node { /// The cost of the cheapest path from the source to the current node. int64_t Distance; @@ -303,13 +304,10 @@ public: rebalanceUnknownSubgraphs(); } - /// The probability for the first successor of a unknown subgraph - static constexpr double UnknownFirstSuccProbability = 0.5; - private: void joinIsolatedComponents() { // Find blocks that are reachable from the source - auto Visited = std::vector<bool>(NumBlocks(), false); + auto Visited = BitVector(NumBlocks(), false); findReachable(Func.Entry, Visited); // Iterate over all non-reachable blocks and adjust their weights @@ -334,7 +332,7 @@ private: /// Run BFS from a given block along the jumps with a positive flow and mark /// all reachable blocks. - void findReachable(uint64_t Src, std::vector<bool> &Visited) { + void findReachable(uint64_t Src, BitVector &Visited) { if (Visited[Src]) return; std::queue<uint64_t> Queue; @@ -452,44 +450,70 @@ private: uint64_t NumBlocks() const { return Func.Blocks.size(); } - /// Rebalance unknown subgraphs so as each branch splits with probabilities - /// UnknownFirstSuccProbability and 1 - UnknownFirstSuccProbability + /// Rebalance unknown subgraphs so that the flow is split evenly across the + /// outgoing branches of every block of the subgraph. The method iterates over + /// blocks with known weight and identifies unknown subgraphs rooted at the + /// blocks. Then it verifies if flow rebalancing is feasible and applies it. void rebalanceUnknownSubgraphs() { - assert(UnknownFirstSuccProbability >= 0.0 && - UnknownFirstSuccProbability <= 1.0 && - "the share of the unknown successor should be between 0 and 1"); - // Try to find unknown subgraphs from each non-unknown block + // Try to find unknown subgraphs from each block for (uint64_t I = 0; I < Func.Blocks.size(); I++) { auto SrcBlock = &Func.Blocks[I]; - // Do not attempt to find unknown successors from a unknown or a - // zero-flow block - if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0) + // Verify if rebalancing rooted at SrcBlock is feasible + if (!canRebalanceAtRoot(SrcBlock)) continue; - std::vector<FlowBlock *> UnknownSuccs; + // Find an unknown subgraphs starting at SrcBlock. Along the way, + // fill in known destinations and intermediate unknown blocks. + std::vector<FlowBlock *> UnknownBlocks; + std::vector<FlowBlock *> KnownDstBlocks; + findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks); + + // Verify if rebalancing of the subgraph is feasible. If the search is + // successful, find the unique destination block (which can be null) FlowBlock *DstBlock = nullptr; - // Find a unknown subgraphs starting at block SrcBlock - if (!findUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs)) + if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks, + DstBlock)) continue; - // At the moment, we do not rebalance subgraphs containing cycles among - // unknown blocks - if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownSuccs)) + + // We cannot rebalance subgraphs containing cycles among unknown blocks + if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks)) continue; // Rebalance the flow - rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs); + rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks); } } - /// Find a unknown subgraph starting at block SrcBlock. - /// If the search is successful, the method sets DstBlock and UnknownSuccs. - bool findUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *&DstBlock, - std::vector<FlowBlock *> &UnknownSuccs) { + /// Verify if rebalancing rooted at a given block is possible. + bool canRebalanceAtRoot(const FlowBlock *SrcBlock) { + // Do not attempt to find unknown subgraphs from an unknown or a + // zero-flow block + if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0) + return false; + + // Do not attempt to process subgraphs from a block w/o unknown sucessors + bool HasUnknownSuccs = false; + for (auto Jump : SrcBlock->SuccJumps) { + if (Func.Blocks[Jump->Target].UnknownWeight) { + HasUnknownSuccs = true; + break; + } + } + if (!HasUnknownSuccs) + return false; + + return true; + } + + /// Find an unknown subgraph starting at block SrcBlock. The method sets + /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks. + void findUnknownSubgraph(const FlowBlock *SrcBlock, + std::vector<FlowBlock *> &KnownDstBlocks, + std::vector<FlowBlock *> &UnknownBlocks) { // Run BFS from SrcBlock and make sure all paths are going through unknown // blocks and end at a non-unknown DstBlock - auto Visited = std::vector<bool>(NumBlocks(), false); + auto Visited = BitVector(NumBlocks(), false); std::queue<uint64_t> Queue; - DstBlock = nullptr; Queue.push(SrcBlock->Index); Visited[SrcBlock->Index] = true; @@ -498,52 +522,105 @@ private: Queue.pop(); // Process blocks reachable from Block for (auto Jump : Block.SuccJumps) { + // If Jump can be ignored, skip it + if (ignoreJump(SrcBlock, nullptr, Jump)) + continue; + uint64_t Dst = Jump->Target; + // If Dst has been visited, skip Jump if (Visited[Dst]) continue; + // Process block Dst Visited[Dst] = true; if (!Func.Blocks[Dst].UnknownWeight) { - // If we see non-unique non-unknown block reachable from SrcBlock, - // stop processing and skip rebalancing - FlowBlock *CandidateDstBlock = &Func.Blocks[Dst]; - if (DstBlock != nullptr && DstBlock != CandidateDstBlock) - return false; - DstBlock = CandidateDstBlock; + KnownDstBlocks.push_back(&Func.Blocks[Dst]); } else { Queue.push(Dst); - UnknownSuccs.push_back(&Func.Blocks[Dst]); + UnknownBlocks.push_back(&Func.Blocks[Dst]); } } } + } + /// Verify if rebalancing of the subgraph is feasible. If the checks are + /// successful, set the unique destination block, DstBlock (can be null). + bool canRebalanceSubgraph(const FlowBlock *SrcBlock, + const std::vector<FlowBlock *> &KnownDstBlocks, + const std::vector<FlowBlock *> &UnknownBlocks, + FlowBlock *&DstBlock) { // If the list of unknown blocks is empty, we don't need rebalancing - if (UnknownSuccs.empty()) + if (UnknownBlocks.empty()) return false; - // If all reachable nodes from SrcBlock are unknown, skip rebalancing - if (DstBlock == nullptr) + + // If there are multiple known sinks, we can't rebalance + if (KnownDstBlocks.size() > 1) return false; - // If any of the unknown blocks is an exit block, skip rebalancing - for (auto Block : UnknownSuccs) { - if (Block->isExit()) + DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front(); + + // Verify sinks of the subgraph + for (auto Block : UnknownBlocks) { + if (Block->SuccJumps.empty()) { + // If there are multiple (known and unknown) sinks, we can't rebalance + if (DstBlock != nullptr) + return false; + continue; + } + size_t NumIgnoredJumps = 0; + for (auto Jump : Block->SuccJumps) { + if (ignoreJump(SrcBlock, DstBlock, Jump)) + NumIgnoredJumps++; + } + // If there is a non-sink block in UnknownBlocks with all jumps ignored, + // then we can't rebalance + if (NumIgnoredJumps == Block->SuccJumps.size()) return false; } return true; } + /// Decide whether the Jump is ignored while processing an unknown subgraphs + /// rooted at basic block SrcBlock with the destination block, DstBlock. + bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, + const FlowJump *Jump) { + // Ignore unlikely jumps with zero flow + if (Jump->IsUnlikely && Jump->Flow == 0) + return true; + + auto JumpSource = &Func.Blocks[Jump->Source]; + auto JumpTarget = &Func.Blocks[Jump->Target]; + + // Do not ignore jumps coming into DstBlock + if (DstBlock != nullptr && JumpTarget == DstBlock) + return false; + + // Ignore jumps out of SrcBlock to known blocks + if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock) + return true; + + // Ignore jumps to known blocks with zero flow + if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0) + return true; + + return false; + } + /// Verify if the given unknown subgraph is acyclic, and if yes, reorder - /// UnknownSuccs in the topological order (so that all jumps are "forward"). - bool isAcyclicSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock, - std::vector<FlowBlock *> &UnknownSuccs) { + /// UnknownBlocks in the topological order (so that all jumps are "forward"). + bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, + std::vector<FlowBlock *> &UnknownBlocks) { // Extract local in-degrees in the considered subgraph auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0); - for (auto Jump : SrcBlock->SuccJumps) { - LocalInDegree[Jump->Target]++; - } - for (uint64_t I = 0; I < UnknownSuccs.size(); I++) { - for (auto Jump : UnknownSuccs[I]->SuccJumps) { + auto fillInDegree = [&](const FlowBlock *Block) { + for (auto Jump : Block->SuccJumps) { + if (ignoreJump(SrcBlock, DstBlock, Jump)) + continue; LocalInDegree[Jump->Target]++; } + }; + fillInDegree(SrcBlock); + for (auto Block : UnknownBlocks) { + fillInDegree(Block); } // A loop containing SrcBlock if (LocalInDegree[SrcBlock->Index] > 0) @@ -553,15 +630,20 @@ private: std::queue<uint64_t> Queue; Queue.push(SrcBlock->Index); while (!Queue.empty()) { - auto &Block = Func.Blocks[Queue.front()]; + FlowBlock *Block = &Func.Blocks[Queue.front()]; Queue.pop(); - // Stop propagation once we reach DstBlock - if (Block.Index == DstBlock->Index) + // Stop propagation once we reach DstBlock, if any + if (DstBlock != nullptr && Block == DstBlock) break; - AcyclicOrder.push_back(&Block); + // Keep an acyclic order of unknown blocks + if (Block->UnknownWeight && Block != SrcBlock) + AcyclicOrder.push_back(Block); + // Add to the queue all successors with zero local in-degree - for (auto Jump : Block.SuccJumps) { + for (auto Jump : Block->SuccJumps) { + if (ignoreJump(SrcBlock, DstBlock, Jump)) + continue; uint64_t Dst = Jump->Target; LocalInDegree[Dst]--; if (LocalInDegree[Dst] == 0) { @@ -572,42 +654,69 @@ private: // If there is a cycle in the subgraph, AcyclicOrder contains only a subset // of all blocks - if (UnknownSuccs.size() + 1 != AcyclicOrder.size()) + if (UnknownBlocks.size() != AcyclicOrder.size()) return false; - UnknownSuccs = AcyclicOrder; + UnknownBlocks = AcyclicOrder; return true; } - /// Rebalance a given subgraph. - void rebalanceUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock, - std::vector<FlowBlock *> &UnknownSuccs) { + /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and + /// having UnknownBlocks intermediate blocks. + void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock, + const FlowBlock *DstBlock, + const std::vector<FlowBlock *> &UnknownBlocks) { assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph"); - assert(UnknownSuccs.front() == SrcBlock && "incorrect order of unknowns"); - for (auto Block : UnknownSuccs) { + // Ditribute flow from the source block + uint64_t BlockFlow = 0; + // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps + for (auto Jump : SrcBlock->SuccJumps) { + if (ignoreJump(SrcBlock, DstBlock, Jump)) + continue; + BlockFlow += Jump->Flow; + } + rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow); + + // Ditribute flow from the remaining blocks + for (auto Block : UnknownBlocks) { + assert(Block->UnknownWeight && "incorrect unknown subgraph"); + uint64_t BlockFlow = 0; // Block's flow is the sum of incoming flows - uint64_t TotalFlow = 0; - if (Block == SrcBlock) { - TotalFlow = Block->Flow; - } else { - for (auto Jump : Block->PredJumps) { - TotalFlow += Jump->Flow; - } - Block->Flow = TotalFlow; + for (auto Jump : Block->PredJumps) { + BlockFlow += Jump->Flow; } + Block->Flow = BlockFlow; + rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow); + } + } - // Process all successor jumps and update corresponding flow values - for (uint64_t I = 0; I < Block->SuccJumps.size(); I++) { - auto Jump = Block->SuccJumps[I]; - if (I + 1 == Block->SuccJumps.size()) { - Jump->Flow = TotalFlow; - continue; - } - uint64_t Flow = uint64_t(TotalFlow * UnknownFirstSuccProbability); - Jump->Flow = Flow; - TotalFlow -= Flow; - } + /// Redistribute flow for a block in a subgraph rooted at SrcBlock, + /// and ending at DstBlock. + void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, + const FlowBlock *Block, uint64_t BlockFlow) { + // Process all successor jumps and update corresponding flow values + size_t BlockDegree = 0; + for (auto Jump : Block->SuccJumps) { + if (ignoreJump(SrcBlock, DstBlock, Jump)) + continue; + BlockDegree++; + } + // If all successor jumps of the block are ignored, skip it + if (DstBlock == nullptr && BlockDegree == 0) + return; + assert(BlockDegree > 0 && "all outgoing jumps are ignored"); + + // Each of the Block's successors gets the following amount of flow. + // Rounding the value up so that all flow is propagated + uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree; + for (auto Jump : Block->SuccJumps) { + if (ignoreJump(SrcBlock, DstBlock, Jump)) + continue; + uint64_t Flow = std::min(SuccFlow, BlockFlow); + Jump->Flow = Flow; + BlockFlow -= Flow; } + assert(BlockFlow == 0 && "not all flow is propagated"); } /// A constant indicating an arbitrary exit block of a function. @@ -799,7 +908,7 @@ void verifyWeights(const FlowFunction &Func) { // Run BFS from the source along edges with positive flow std::queue<uint64_t> Queue; - auto Visited = std::vector<bool>(NumBlocks, false); + auto Visited = BitVector(NumBlocks, false); Queue.push(Func.Entry); Visited[Func.Entry] = true; while (!Queue.empty()) { diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index c840ee85795f..5363a851fc27 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -173,7 +173,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { auto *PtrTy = cast<PointerType>(Ty); if (DL.isNonIntegralPointerType(PtrTy)) { auto *Int8PtrTy = Builder.getInt8PtrTy(PtrTy->getAddressSpace()); - assert(DL.getTypeAllocSize(Int8PtrTy->getElementType()) == 1 && + assert(DL.getTypeAllocSize(Builder.getInt8Ty()) == 1 && "alloc size of i8 must by 1 byte for the GEP to be correct"); auto *GEP = Builder.CreateGEP( Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "uglygep"); @@ -471,7 +471,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // indexes into the array implied by the pointer operand; the rest of // the indices index into the element or field type selected by the // preceding index. - Type *ElTy = PTy->getElementType(); + Type *ElTy = PTy->getNonOpaquePointerElementType(); for (;;) { // If the scale size is not 0, attempt to factor out a scale for // array indexing. @@ -640,8 +640,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, Value *Casted = V; if (V->getType() != PTy) Casted = InsertNoopCastOfTo(Casted, PTy); - Value *GEP = Builder.CreateGEP(PTy->getElementType(), Casted, GepIndices, - "scevgep"); + Value *GEP = Builder.CreateGEP(PTy->getNonOpaquePointerElementType(), + Casted, GepIndices, "scevgep"); Ops.push_back(SE.getUnknown(GEP)); } @@ -1671,7 +1671,7 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { return Builder.CreateSExt(V, Ty); } -Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { +Value *SCEVExpander::expandSMaxExpr(const SCEVNAryExpr *S) { Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); Type *Ty = LHS->getType(); for (int i = S->getNumOperands()-2; i >= 0; --i) { @@ -1700,7 +1700,7 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { return LHS; } -Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { +Value *SCEVExpander::expandUMaxExpr(const SCEVNAryExpr *S) { Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); Type *Ty = LHS->getType(); for (int i = S->getNumOperands()-2; i >= 0; --i) { @@ -1729,7 +1729,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { return LHS; } -Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { +Value *SCEVExpander::expandSMinExpr(const SCEVNAryExpr *S) { Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); Type *Ty = LHS->getType(); for (int i = S->getNumOperands() - 2; i >= 0; --i) { @@ -1758,7 +1758,7 @@ Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { return LHS; } -Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { +Value *SCEVExpander::expandUMinExpr(const SCEVNAryExpr *S) { Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); Type *Ty = LHS->getType(); for (int i = S->getNumOperands() - 2; i >= 0; --i) { @@ -1787,6 +1787,40 @@ Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { return LHS; } +Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { + return expandSMaxExpr(S); +} + +Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { + return expandUMaxExpr(S); +} + +Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { + return expandSMinExpr(S); +} + +Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { + return expandUMinExpr(S); +} + +Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) { + SmallVector<Value *> Ops; + for (const SCEV *Op : S->operands()) + Ops.emplace_back(expand(Op)); + + Value *SaturationPoint = + MinMaxIntrinsic::getSaturationPoint(Intrinsic::umin, S->getType()); + + SmallVector<Value *> OpIsZero; + for (Value *Op : ArrayRef<Value *>(Ops).drop_back()) + OpIsZero.emplace_back(Builder.CreateICmpEQ(Op, SaturationPoint)); + + Value *AnyOpIsZero = Builder.CreateLogicalOr(OpIsZero); + + Value *NaiveUMin = expandUMinExpr(S); + return Builder.CreateSelect(AnyOpIsZero, SaturationPoint, NaiveUMin); +} + Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, Instruction *IP, bool Root) { setInsertPoint(IP); @@ -1809,8 +1843,8 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) { // instruction. Instruction *Tmp; if (Inst->getType()->isIntegerTy()) - Tmp = - cast<Instruction>(Builder.CreateAdd(Inst, Inst, "tmp.lcssa.user")); + Tmp = cast<Instruction>(Builder.CreateIntToPtr( + Inst, Inst->getType()->getPointerTo(), "tmp.lcssa.user")); else { assert(Inst->getType()->isPointerTy()); Tmp = cast<Instruction>(Builder.CreatePtrToInt( @@ -1947,22 +1981,14 @@ Value *SCEVExpander::expand(const SCEV *S) { if (VO.second) { if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) { - Type *Ety = Vty->getPointerElementType(); int64_t Offset = VO.second->getSExtValue(); - int64_t ESize = SE.getTypeSizeInBits(Ety); - if ((Offset * 8) % ESize == 0) { - ConstantInt *Idx = - ConstantInt::getSigned(VO.second->getType(), -(Offset * 8) / ESize); - V = Builder.CreateGEP(Ety, V, Idx, "scevgep"); - } else { - ConstantInt *Idx = - ConstantInt::getSigned(VO.second->getType(), -Offset); - unsigned AS = Vty->getAddressSpace(); - V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS)); - V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx, - "uglygep"); - V = Builder.CreateBitCast(V, Vty); - } + ConstantInt *Idx = + ConstantInt::getSigned(VO.second->getType(), -Offset); + unsigned AS = Vty->getAddressSpace(); + V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS)); + V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx, + "uglygep"); + V = Builder.CreateBitCast(V, Vty); } else { V = Builder.CreateSub(V, VO.second); } @@ -2271,10 +2297,27 @@ template<typename T> static InstructionCost costAndCollectOperands( case scSMaxExpr: case scUMaxExpr: case scSMinExpr: - case scUMinExpr: { + case scUMinExpr: + case scSequentialUMinExpr: { // FIXME: should this ask the cost for Intrinsic's? + // The reduction tree. Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1); Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2); + switch (S->getSCEVType()) { + case scSequentialUMinExpr: { + // The safety net against poison. + // FIXME: this is broken. + Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 0); + Cost += ArithCost(Instruction::Or, + S->getNumOperands() > 2 ? S->getNumOperands() - 2 : 0); + Cost += CmpSelCost(Instruction::Select, 1, 0, 1); + break; + } + default: + assert(!isa<SCEVSequentialMinMaxExpr>(S) && + "Unhandled SCEV expression type?"); + break; + } break; } case scAddRecExpr: { @@ -2362,7 +2405,7 @@ bool SCEVExpander::isHighCostExpansionHelper( case scConstant: { // Only evalulate the costs of constants when optimizing for size. if (CostKind != TargetTransformInfo::TCK_CodeSize) - return 0; + return false; const APInt &Imm = cast<SCEVConstant>(S)->getAPInt(); Type *Ty = S->getType(); Cost += TTI.getIntImmCostInst( @@ -2399,7 +2442,8 @@ bool SCEVExpander::isHighCostExpansionHelper( case scUMaxExpr: case scSMaxExpr: case scUMinExpr: - case scSMinExpr: { + case scSMinExpr: + case scSequentialUMinExpr: { assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 && "Nary expr should have more than 1 operand."); // The simple nary expr will require one less op (or pair of ops) @@ -2490,49 +2534,73 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero); Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue); - // Get the backedge taken count and truncate or extended to the AR type. - Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty); - // Compute |Step| * Backedge - Value *MulV, *OfMul; - if (Step->isOne()) { - // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't - // needed, there is never an overflow, so to avoid artificially inflating - // the cost of the check, directly emit the optimized IR. - MulV = TruncTripCount; - OfMul = ConstantInt::getFalse(MulV->getContext()); - } else { - auto *MulF = Intrinsic::getDeclaration(Loc->getModule(), - Intrinsic::umul_with_overflow, Ty); - CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul"); - MulV = Builder.CreateExtractValue(Mul, 0, "mul.result"); - OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow"); - } - // Compute: - // Start + |Step| * Backedge < Start - // Start - |Step| * Backedge > Start - Value *Add = nullptr, *Sub = nullptr; - if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) { - StartValue = InsertNoopCastOfTo( - StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace())); - Value *NegMulV = Builder.CreateNeg(MulV); - Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV); - Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV); - } else { - Add = Builder.CreateAdd(StartValue, MulV); - Sub = Builder.CreateSub(StartValue, MulV); - } - - Value *EndCompareGT = Builder.CreateICmp( - Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue); + // 1. Start + |Step| * Backedge < Start + // 2. Start - |Step| * Backedge > Start + // + // And select either 1. or 2. depending on whether step is positive or + // negative. If Step is known to be positive or negative, only create + // either 1. or 2. + auto ComputeEndCheck = [&]() -> Value * { + // Checking <u 0 is always false. + if (!Signed && Start->isZero() && SE.isKnownPositive(Step)) + return ConstantInt::getFalse(Loc->getContext()); + + // Get the backedge taken count and truncate or extended to the AR type. + Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty); + + Value *MulV, *OfMul; + if (Step->isOne()) { + // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't + // needed, there is never an overflow, so to avoid artificially inflating + // the cost of the check, directly emit the optimized IR. + MulV = TruncTripCount; + OfMul = ConstantInt::getFalse(MulV->getContext()); + } else { + auto *MulF = Intrinsic::getDeclaration(Loc->getModule(), + Intrinsic::umul_with_overflow, Ty); + CallInst *Mul = + Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul"); + MulV = Builder.CreateExtractValue(Mul, 0, "mul.result"); + OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow"); + } - Value *EndCompareLT = Builder.CreateICmp( - Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue); + Value *Add = nullptr, *Sub = nullptr; + bool NeedPosCheck = !SE.isKnownNegative(Step); + bool NeedNegCheck = !SE.isKnownPositive(Step); + + if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) { + StartValue = InsertNoopCastOfTo( + StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace())); + Value *NegMulV = Builder.CreateNeg(MulV); + if (NeedPosCheck) + Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV); + if (NeedNegCheck) + Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV); + } else { + if (NeedPosCheck) + Add = Builder.CreateAdd(StartValue, MulV); + if (NeedNegCheck) + Sub = Builder.CreateSub(StartValue, MulV); + } - // Select the answer based on the sign of Step. - Value *EndCheck = - Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT); + Value *EndCompareLT = nullptr; + Value *EndCompareGT = nullptr; + Value *EndCheck = nullptr; + if (NeedPosCheck) + EndCheck = EndCompareLT = Builder.CreateICmp( + Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue); + if (NeedNegCheck) + EndCheck = EndCompareGT = Builder.CreateICmp( + Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue); + if (NeedPosCheck && NeedNegCheck) { + // Select the answer based on the sign of Step. + EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT); + } + return Builder.CreateOr(EndCheck, OfMul); + }; + Value *EndCheck = ComputeEndCheck(); // If the backedge taken count type is larger than the AR type, // check that we don't drop any bits by truncating it. If we are @@ -2548,7 +2616,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck); } - return Builder.CreateOr(EndCheck, OfMul); + return EndCheck; } Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred, @@ -2578,17 +2646,16 @@ Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred, Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union, Instruction *IP) { - auto *BoolType = IntegerType::get(IP->getContext(), 1); - Value *Check = ConstantInt::getNullValue(BoolType); - // Loop over all checks in this set. + SmallVector<Value *> Checks; for (auto Pred : Union->getPredicates()) { - auto *NextCheck = expandCodeForPredicate(Pred, IP); + Checks.push_back(expandCodeForPredicate(Pred, IP)); Builder.SetInsertPoint(IP); - Check = Builder.CreateOr(Check, NextCheck); } - return Check; + if (Checks.empty()) + return ConstantInt::getFalse(IP->getContext()); + return Builder.CreateOr(Checks); } Value *SCEVExpander::fixupLCSSAFormFor(Instruction *User, unsigned OpIdx) { @@ -2720,13 +2787,8 @@ void SCEVExpanderCleaner::cleanup() { // Remove sets with value handles. Expander.clear(); - // Sort so that earlier instructions do not dominate later instructions. - stable_sort(InsertedInstructions, [this](Instruction *A, Instruction *B) { - return DT.dominates(B, A); - }); // Remove all inserted instructions. - for (Instruction *I : InsertedInstructions) { - + for (Instruction *I : reverse(InsertedInstructions)) { #ifndef NDEBUG assert(all_of(I->users(), [&InsertedSet](Value *U) { diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 1046998c26de..335ac03ccb52 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2052,109 +2052,119 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, if (ScanIdx == 0) return false; - // Okay, we *could* sink last ScanIdx instructions. But how many can we - // actually sink before encountering instruction that is unprofitable to sink? - auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) { - unsigned NumPHIdValues = 0; - for (auto *I : *LRI) - for (auto *V : PHIOperands[I]) { - if (!InstructionsToSink.contains(V)) - ++NumPHIdValues; - // FIXME: this check is overly optimistic. We may end up not sinking - // said instruction, due to the very same profitability check. - // See @creating_too_many_phis in sink-common-code.ll. - } - LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n"); - unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size(); - if ((NumPHIdValues % UnconditionalPreds.size()) != 0) + bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB); + + if (!followedByDeoptOrUnreachable) { + // Okay, we *could* sink last ScanIdx instructions. But how many can we + // actually sink before encountering instruction that is unprofitable to + // sink? + auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) { + unsigned NumPHIdValues = 0; + for (auto *I : *LRI) + for (auto *V : PHIOperands[I]) { + if (!InstructionsToSink.contains(V)) + ++NumPHIdValues; + // FIXME: this check is overly optimistic. We may end up not sinking + // said instruction, due to the very same profitability check. + // See @creating_too_many_phis in sink-common-code.ll. + } + LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n"); + unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size(); + if ((NumPHIdValues % UnconditionalPreds.size()) != 0) NumPHIInsts++; - return NumPHIInsts <= 1; - }; + return NumPHIInsts <= 1; + }; - // We've determined that we are going to sink last ScanIdx instructions, - // and recorded them in InstructionsToSink. Now, some instructions may be - // unprofitable to sink. But that determination depends on the instructions - // that we are going to sink. - - // First, forward scan: find the first instruction unprofitable to sink, - // recording all the ones that are profitable to sink. - // FIXME: would it be better, after we detect that not all are profitable. - // to either record the profitable ones, or erase the unprofitable ones? - // Maybe we need to choose (at runtime) the one that will touch least instrs? - LRI.reset(); - int Idx = 0; - SmallPtrSet<Value *, 4> InstructionsProfitableToSink; - while (Idx < ScanIdx) { - if (!ProfitableToSinkInstruction(LRI)) { - // Too many PHIs would be created. - LLVM_DEBUG( - dbgs() << "SINK: stopping here, too many PHIs would be created!\n"); - break; + // We've determined that we are going to sink last ScanIdx instructions, + // and recorded them in InstructionsToSink. Now, some instructions may be + // unprofitable to sink. But that determination depends on the instructions + // that we are going to sink. + + // First, forward scan: find the first instruction unprofitable to sink, + // recording all the ones that are profitable to sink. + // FIXME: would it be better, after we detect that not all are profitable. + // to either record the profitable ones, or erase the unprofitable ones? + // Maybe we need to choose (at runtime) the one that will touch least + // instrs? + LRI.reset(); + int Idx = 0; + SmallPtrSet<Value *, 4> InstructionsProfitableToSink; + while (Idx < ScanIdx) { + if (!ProfitableToSinkInstruction(LRI)) { + // Too many PHIs would be created. + LLVM_DEBUG( + dbgs() << "SINK: stopping here, too many PHIs would be created!\n"); + break; + } + InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end()); + --LRI; + ++Idx; } - InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end()); - --LRI; - ++Idx; - } - // If no instructions can be sunk, early-return. - if (Idx == 0) - return false; + // If no instructions can be sunk, early-return. + if (Idx == 0) + return false; - // Did we determine that (only) some instructions are unprofitable to sink? - if (Idx < ScanIdx) { - // Okay, some instructions are unprofitable. - ScanIdx = Idx; - InstructionsToSink = InstructionsProfitableToSink; - - // But, that may make other instructions unprofitable, too. - // So, do a backward scan, do any earlier instructions become unprofitable? - assert(!ProfitableToSinkInstruction(LRI) && - "We already know that the last instruction is unprofitable to sink"); - ++LRI; - --Idx; - while (Idx >= 0) { - // If we detect that an instruction becomes unprofitable to sink, - // all earlier instructions won't be sunk either, - // so preemptively keep InstructionsProfitableToSink in sync. - // FIXME: is this the most performant approach? - for (auto *I : *LRI) - InstructionsProfitableToSink.erase(I); - if (!ProfitableToSinkInstruction(LRI)) { - // Everything starting with this instruction won't be sunk. - ScanIdx = Idx; - InstructionsToSink = InstructionsProfitableToSink; - } + // Did we determine that (only) some instructions are unprofitable to sink? + if (Idx < ScanIdx) { + // Okay, some instructions are unprofitable. + ScanIdx = Idx; + InstructionsToSink = InstructionsProfitableToSink; + + // But, that may make other instructions unprofitable, too. + // So, do a backward scan, do any earlier instructions become + // unprofitable? + assert( + !ProfitableToSinkInstruction(LRI) && + "We already know that the last instruction is unprofitable to sink"); ++LRI; --Idx; + while (Idx >= 0) { + // If we detect that an instruction becomes unprofitable to sink, + // all earlier instructions won't be sunk either, + // so preemptively keep InstructionsProfitableToSink in sync. + // FIXME: is this the most performant approach? + for (auto *I : *LRI) + InstructionsProfitableToSink.erase(I); + if (!ProfitableToSinkInstruction(LRI)) { + // Everything starting with this instruction won't be sunk. + ScanIdx = Idx; + InstructionsToSink = InstructionsProfitableToSink; + } + ++LRI; + --Idx; + } } - } - // If no instructions can be sunk, early-return. - if (ScanIdx == 0) - return false; + // If no instructions can be sunk, early-return. + if (ScanIdx == 0) + return false; + } bool Changed = false; if (HaveNonUnconditionalPredecessors) { - // It is always legal to sink common instructions from unconditional - // predecessors. However, if not all predecessors are unconditional, - // this transformation might be pessimizing. So as a rule of thumb, - // don't do it unless we'd sink at least one non-speculatable instruction. - // See https://bugs.llvm.org/show_bug.cgi?id=30244 - LRI.reset(); - int Idx = 0; - bool Profitable = false; - while (Idx < ScanIdx) { - if (!isSafeToSpeculativelyExecute((*LRI)[0])) { - Profitable = true; - break; + if (!followedByDeoptOrUnreachable) { + // It is always legal to sink common instructions from unconditional + // predecessors. However, if not all predecessors are unconditional, + // this transformation might be pessimizing. So as a rule of thumb, + // don't do it unless we'd sink at least one non-speculatable instruction. + // See https://bugs.llvm.org/show_bug.cgi?id=30244 + LRI.reset(); + int Idx = 0; + bool Profitable = false; + while (Idx < ScanIdx) { + if (!isSafeToSpeculativelyExecute((*LRI)[0])) { + Profitable = true; + break; + } + --LRI; + ++Idx; } - --LRI; - ++Idx; + if (!Profitable) + return false; } - if (!Profitable) - return false; LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n"); // We have a conditional edge and we're going to sink some instructions. @@ -4935,14 +4945,13 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL) { Value *Cond = SI->getCondition(); - unsigned Bits = Cond->getType()->getIntegerBitWidth(); KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI); // We can also eliminate cases by determining that their values are outside of // the limited range of the condition based on how many significant (non-sign) // bits are in the condition value. - unsigned ExtraSignBits = ComputeNumSignBits(Cond, DL, 0, AC, SI) - 1; - unsigned MaxSignificantBitsInCond = Bits - ExtraSignBits; + unsigned MaxSignificantBitsInCond = + ComputeMaxSignificantBits(Cond, DL, 0, AC, SI); // Gather dead cases. SmallVector<ConstantInt *, 8> DeadCases; @@ -4973,8 +4982,8 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, bool HasDefault = !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); const unsigned NumUnknownBits = - Bits - (Known.Zero | Known.One).countPopulation(); - assert(NumUnknownBits <= Bits); + Known.getBitWidth() - (Known.Zero | Known.One).countPopulation(); + assert(NumUnknownBits <= Known.getBitWidth()); if (HasDefault && DeadCases.empty() && NumUnknownBits < 64 /* avoid overflow */ && SI->getNumCases() == (1ULL << NumUnknownBits)) { @@ -5796,10 +5805,9 @@ static void reuseTableCompare( for (auto ValuePair : Values) { Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(), ValuePair.second, CmpOp1, true); - if (!CaseConst || CaseConst == DefaultConst || isa<UndefValue>(CaseConst)) + if (!CaseConst || CaseConst == DefaultConst || + (CaseConst != TrueConst && CaseConst != FalseConst)) return; - assert((CaseConst == TrueConst || CaseConst == FalseConst) && - "Expect true or false as compare result."); } // Check if the branch instruction dominates the phi node. It's a simple diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 02727a3dbf9c..e02d02a05752 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -602,7 +602,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) { Align MemSetAlign = CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne(); CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, MemSetAlign); - AttrBuilder ArgAttrs(CI->getAttributes().getParamAttrs(0)); + AttrBuilder ArgAttrs(CI->getContext(), CI->getAttributes().getParamAttrs(0)); NewCI->setAttributes(NewCI->getAttributes().addParamAttributes( CI->getContext(), 0, ArgAttrs)); copyFlags(*CI, NewCI); @@ -2515,8 +2515,9 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, } else if (Value *V = emitStpCpy(Dest, CI->getArgOperand(2), B, TLI)) { // sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest // Handle mismatched pointer types (goes away with typeless pointers?). - V = B.CreatePointerCast(V, Dest->getType()); - Value *PtrDiff = B.CreatePtrDiff(V, Dest); + V = B.CreatePointerCast(V, B.getInt8PtrTy()); + Dest = B.CreatePointerCast(Dest, B.getInt8PtrTy()); + Value *PtrDiff = B.CreatePtrDiff(B.getInt8Ty(), V, Dest); return B.CreateIntCast(PtrDiff, CI->getType(), false); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp index b822db938af8..8947303674ee 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -398,13 +398,17 @@ Value *Mapper::mapValue(const Value *V) { SmallVector<ValueAsMetadata *, 4> MappedArgs; for (auto *VAM : AL->getArgs()) { // Map both Local and Constant VAMs here; they will both ultimately - // be mapped via mapValue (apart from constants when we have no - // module level changes, which have an identity mapping). + // be mapped via mapValue. The exceptions are constants when we have no + // module level changes and locals when they have no existing mapped + // value and RF_IgnoreMissingLocals is set; these have identity + // mappings. if ((Flags & RF_NoModuleLevelChanges) && isa<ConstantAsMetadata>(VAM)) { MappedArgs.push_back(VAM); } else if (Value *LV = mapValue(VAM->getValue())) { MappedArgs.push_back( LV == VAM->getValue() ? VAM : ValueAsMetadata::get(LV)); + } else if ((Flags & RF_IgnoreMissingLocals) && isa<LocalAsMetadata>(VAM)) { + MappedArgs.push_back(VAM); } else { // If we cannot map the value, set the argument as undef. MappedArgs.push_back(ValueAsMetadata::get( |
