aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp7
-rw-r--r--lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h7
-rw-r--r--lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp7
-rw-r--r--lib/Transforms/Coroutines/CoroCleanup.cpp9
-rw-r--r--lib/Transforms/Coroutines/CoroEarly.cpp11
-rw-r--r--lib/Transforms/Coroutines/CoroElide.cpp7
-rw-r--r--lib/Transforms/Coroutines/CoroFrame.cpp62
-rw-r--r--lib/Transforms/Coroutines/CoroInstr.h7
-rw-r--r--lib/Transforms/Coroutines/CoroInternal.h7
-rw-r--r--lib/Transforms/Coroutines/CoroSplit.cpp30
-rw-r--r--lib/Transforms/Coroutines/Coroutines.cpp15
-rw-r--r--lib/Transforms/IPO/AlwaysInliner.cpp41
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp93
-rw-r--r--lib/Transforms/IPO/Attributor.cpp1690
-rw-r--r--lib/Transforms/IPO/BarrierNoopPass.cpp7
-rw-r--r--lib/Transforms/IPO/BlockExtractor.cpp122
-rw-r--r--lib/Transforms/IPO/CalledValuePropagation.cpp7
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp29
-rw-r--r--lib/Transforms/IPO/CrossDSOCFI.cpp17
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp9
-rw-r--r--lib/Transforms/IPO/ElimAvailExtern.cpp7
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp7
-rw-r--r--lib/Transforms/IPO/ForceFunctionAttrs.cpp8
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp73
-rw-r--r--lib/Transforms/IPO/FunctionImport.cpp57
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp7
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp144
-rw-r--r--lib/Transforms/IPO/GlobalSplit.cpp7
-rw-r--r--lib/Transforms/IPO/HotColdSplitting.cpp424
-rw-r--r--lib/Transforms/IPO/IPConstantPropagation.cpp50
-rw-r--r--lib/Transforms/IPO/IPO.cpp8
-rw-r--r--lib/Transforms/IPO/InferFunctionAttrs.cpp9
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp13
-rw-r--r--lib/Transforms/IPO/Inliner.cpp19
-rw-r--r--lib/Transforms/IPO/Internalize.cpp30
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp14
-rw-r--r--lib/Transforms/IPO/LowerTypeTests.cpp41
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp70
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp75
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp206
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp18
-rw-r--r--lib/Transforms/IPO/SCCP.cpp1
-rw-r--r--lib/Transforms/IPO/SampleProfile.cpp66
-rw-r--r--lib/Transforms/IPO/StripDeadPrototypes.cpp7
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp7
-rw-r--r--lib/Transforms/IPO/SyntheticCountsPropagation.cpp7
-rw-r--r--lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp50
-rw-r--r--lib/Transforms/IPO/WholeProgramDevirt.cpp53
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp193
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp120
-rw-r--r--lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp159
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp1158
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp90
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp643
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h101
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp62
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp103
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp15
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp288
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp98
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp112
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp321
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp348
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp775
-rw-r--r--lib/Transforms/Instrumentation/BoundsChecking.cpp12
-rw-r--r--lib/Transforms/Instrumentation/CFGMST.h16
-rw-r--r--lib/Transforms/Instrumentation/CGProfile.cpp7
-rw-r--r--lib/Transforms/Instrumentation/ControlHeightReduction.cpp70
-rw-r--r--lib/Transforms/Instrumentation/DataFlowSanitizer.cpp171
-rw-r--r--lib/Transforms/Instrumentation/EfficiencySanitizer.cpp900
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp154
-rw-r--r--lib/Transforms/Instrumentation/HWAddressSanitizer.cpp594
-rw-r--r--lib/Transforms/Instrumentation/IndirectCallPromotion.cpp21
-rw-r--r--lib/Transforms/Instrumentation/InstrOrderFile.cpp211
-rw-r--r--lib/Transforms/Instrumentation/InstrProfiling.cpp213
-rw-r--r--lib/Transforms/Instrumentation/Instrumentation.cpp25
-rw-r--r--lib/Transforms/Instrumentation/MaximumSpanningTree.h10
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp395
-rw-r--r--lib/Transforms/Instrumentation/PGOInstrumentation.cpp445
-rw-r--r--lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp9
-rw-r--r--lib/Transforms/Instrumentation/PoisonChecking.cpp357
-rw-r--r--lib/Transforms/Instrumentation/SanitizerCoverage.cpp199
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp141
-rw-r--r--lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h31
-rw-r--r--lib/Transforms/ObjCARC/BlotMapVector.h7
-rw-r--r--lib/Transforms/ObjCARC/DependencyAnalysis.cpp7
-rw-r--r--lib/Transforms/ObjCARC/DependencyAnalysis.h7
-rw-r--r--lib/Transforms/ObjCARC/ObjCARC.cpp7
-rw-r--r--lib/Transforms/ObjCARC/ObjCARC.h7
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCAPElim.cpp7
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCContract.cpp251
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCExpand.cpp7
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCOpts.cpp80
-rw-r--r--lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp7
-rw-r--r--lib/Transforms/ObjCARC/ProvenanceAnalysis.h7
-rw-r--r--lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp7
-rw-r--r--lib/Transforms/ObjCARC/PtrState.cpp7
-rw-r--r--lib/Transforms/ObjCARC/PtrState.h7
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp20
-rw-r--r--lib/Transforms/Scalar/AlignmentFromAssumptions.cpp7
-rw-r--r--lib/Transforms/Scalar/BDCE.cpp17
-rw-r--r--lib/Transforms/Scalar/CallSiteSplitting.cpp12
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp40
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp7
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp305
-rw-r--r--lib/Transforms/Scalar/DCE.cpp7
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp102
-rw-r--r--lib/Transforms/Scalar/DivRemPairs.cpp7
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp239
-rw-r--r--lib/Transforms/Scalar/FlattenCFGPass.cpp7
-rw-r--r--lib/Transforms/Scalar/Float2Int.cpp29
-rw-r--r--lib/Transforms/Scalar/GVN.cpp104
-rw-r--r--lib/Transforms/Scalar/GVNHoist.cpp9
-rw-r--r--lib/Transforms/Scalar/GVNSink.cpp22
-rw-r--r--lib/Transforms/Scalar/GuardWidening.cpp212
-rw-r--r--lib/Transforms/Scalar/IVUsersPrinter.cpp7
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp716
-rw-r--r--lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp101
-rw-r--r--lib/Transforms/Scalar/InferAddressSpaces.cpp53
-rw-r--r--lib/Transforms/Scalar/InstSimplifyPass.cpp7
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp93
-rw-r--r--lib/Transforms/Scalar/LICM.cpp466
-rw-r--r--lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopDataPrefetch.cpp11
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopDistribute.cpp22
-rw-r--r--lib/Transforms/Scalar/LoopFuse.cpp1215
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp94
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp9
-rw-r--r--lib/Transforms/Scalar/LoopInterchange.cpp130
-rw-r--r--lib/Transforms/Scalar/LoopLoadElimination.cpp62
-rw-r--r--lib/Transforms/Scalar/LoopPassManager.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopPredication.cpp524
-rw-r--r--lib/Transforms/Scalar/LoopRerollPass.cpp17
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp12
-rw-r--r--lib/Transforms/Scalar/LoopSimplifyCFG.cpp237
-rw-r--r--lib/Transforms/Scalar/LoopSink.cpp14
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp344
-rw-r--r--lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp10
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp97
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp26
-rw-r--r--lib/Transforms/Scalar/LoopVersioningLICM.cpp17
-rw-r--r--lib/Transforms/Scalar/LowerAtomic.cpp17
-rw-r--r--lib/Transforms/Scalar/LowerExpectIntrinsic.cpp7
-rw-r--r--lib/Transforms/Scalar/LowerGuardIntrinsic.cpp7
-rw-r--r--lib/Transforms/Scalar/LowerWidenableCondition.cpp85
-rw-r--r--lib/Transforms/Scalar/MakeGuardsExplicit.cpp7
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp24
-rw-r--r--lib/Transforms/Scalar/MergeICmps.cpp728
-rw-r--r--lib/Transforms/Scalar/MergedLoadStoreMotion.cpp7
-rw-r--r--lib/Transforms/Scalar/NaryReassociate.cpp11
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp61
-rw-r--r--lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp7
-rw-r--r--lib/Transforms/Scalar/PlaceSafepoints.cpp42
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp103
-rw-r--r--lib/Transforms/Scalar/Reg2Mem.cpp7
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp254
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp76
-rw-r--r--lib/Transforms/Scalar/SROA.cpp215
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp11
-rw-r--r--lib/Transforms/Scalar/Scalarizer.cpp70
-rw-r--r--lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp7
-rw-r--r--lib/Transforms/Scalar/SimpleLoopUnswitch.cpp130
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp7
-rw-r--r--lib/Transforms/Scalar/Sink.cpp7
-rw-r--r--lib/Transforms/Scalar/SpeculateAroundPHIs.cpp15
-rw-r--r--lib/Transforms/Scalar/SpeculativeExecution.cpp8
-rw-r--r--lib/Transforms/Scalar/StraightLineStrengthReduce.cpp15
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp47
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp13
-rw-r--r--lib/Transforms/Scalar/WarnMissedTransforms.cpp9
-rw-r--r--lib/Transforms/Utils/ASanStackFrameLayout.cpp9
-rw-r--r--lib/Transforms/Utils/AddDiscriminators.cpp11
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp141
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp34
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp339
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp7
-rw-r--r--lib/Transforms/Utils/CallPromotionUtils.cpp21
-rw-r--r--lib/Transforms/Utils/CanonicalizeAliases.cpp7
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp37
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp7
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp496
-rw-r--r--lib/Transforms/Utils/CtorUtils.cpp7
-rw-r--r--lib/Transforms/Utils/DemoteRegToStack.cpp16
-rw-r--r--lib/Transforms/Utils/EntryExitInstrumenter.cpp11
-rw-r--r--lib/Transforms/Utils/EscapeEnumerator.cpp13
-rw-r--r--lib/Transforms/Utils/Evaluator.cpp113
-rw-r--r--lib/Transforms/Utils/FlattenCFG.cpp7
-rw-r--r--lib/Transforms/Utils/FunctionComparator.cpp53
-rw-r--r--lib/Transforms/Utils/FunctionImportUtils.cpp44
-rw-r--r--lib/Transforms/Utils/GlobalStatus.cpp7
-rw-r--r--lib/Transforms/Utils/GuardUtils.cpp7
-rw-r--r--lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp7
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp172
-rw-r--r--lib/Transforms/Utils/InstructionNamer.cpp7
-rw-r--r--lib/Transforms/Utils/IntegerDivision.cpp7
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp47
-rw-r--r--lib/Transforms/Utils/LibCallsShrinkWrap.cpp7
-rw-r--r--lib/Transforms/Utils/Local.cpp387
-rw-r--r--lib/Transforms/Utils/LoopRotationUtils.cpp26
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp126
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp424
-rw-r--r--lib/Transforms/Utils/LoopUnrollAndJam.cpp17
-rw-r--r--lib/Transforms/Utils/LoopUnrollPeel.cpp210
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp61
-rw-r--r--lib/Transforms/Utils/LoopUtils.cpp106
-rw-r--r--lib/Transforms/Utils/LoopVersioning.cpp12
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp10
-rw-r--r--lib/Transforms/Utils/LowerMemIntrinsics.cpp29
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp218
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp7
-rw-r--r--lib/Transforms/Utils/MetaRenamer.cpp7
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp88
-rw-r--r--lib/Transforms/Utils/NameAnonGlobals.cpp7
-rw-r--r--lib/Transforms/Utils/PredicateInfo.cpp18
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp66
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp10
-rw-r--r--lib/Transforms/Utils/SSAUpdaterBulk.cpp7
-rw-r--r--lib/Transforms/Utils/SanitizerStats.cpp15
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp203
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp200
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp510
-rw-r--r--lib/Transforms/Utils/SizeOpts.cpp37
-rw-r--r--lib/Transforms/Utils/SplitModule.cpp7
-rw-r--r--lib/Transforms/Utils/StripGCRelocates.cpp7
-rw-r--r--lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp7
-rw-r--r--lib/Transforms/Utils/SymbolRewriter.cpp7
-rw-r--r--lib/Transforms/Utils/UnifyFunctionExitNodes.cpp7
-rw-r--r--lib/Transforms/Utils/Utils.cpp10
-rw-r--r--lib/Transforms/Utils/VNCoercion.cpp66
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp22
-rw-r--r--lib/Transforms/Vectorize/LoadStoreVectorizer.cpp21
-rw-r--r--lib/Transforms/Vectorize/LoopVectorizationLegality.cpp347
-rw-r--r--lib/Transforms/Vectorize/LoopVectorizationPlanner.h23
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp476
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp1362
-rw-r--r--lib/Transforms/Vectorize/VPRecipeBuilder.h14
-rw-r--r--lib/Transforms/Vectorize/VPlan.cpp23
-rw-r--r--lib/Transforms/Vectorize/VPlan.h60
-rw-r--r--lib/Transforms/Vectorize/VPlanDominatorTree.h7
-rw-r--r--lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp11
-rw-r--r--lib/Transforms/Vectorize/VPlanHCFGBuilder.h7
-rw-r--r--lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp7
-rw-r--r--lib/Transforms/Vectorize/VPlanHCFGTransforms.h7
-rw-r--r--lib/Transforms/Vectorize/VPlanLoopInfo.h7
-rw-r--r--lib/Transforms/Vectorize/VPlanPredicator.cpp248
-rw-r--r--lib/Transforms/Vectorize/VPlanPredicator.h74
-rw-r--r--lib/Transforms/Vectorize/VPlanSLP.cpp7
-rw-r--r--lib/Transforms/Vectorize/VPlanValue.h7
-rw-r--r--lib/Transforms/Vectorize/VPlanVerifier.cpp7
-rw-r--r--lib/Transforms/Vectorize/VPlanVerifier.h7
-rw-r--r--lib/Transforms/Vectorize/Vectorize.cpp7
252 files changed, 17948 insertions, 9441 deletions
diff --git a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index c795866ec0f2..06222d7e7e44 100644
--- a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -1,9 +1,8 @@
//===- AggressiveInstCombine.cpp ------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
index f3c8bde9f8ff..44e1c45664e7 100644
--- a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
+++ b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
@@ -1,9 +1,8 @@
//===- AggressiveInstCombineInternal.h --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
index 8289b2d68f8a..7c5767912fd3 100644
--- a/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
+++ b/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
@@ -1,9 +1,8 @@
//===- TruncInstCombine.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Coroutines/CoroCleanup.cpp b/lib/Transforms/Coroutines/CoroCleanup.cpp
index 359876627fce..1fb0a114d0c7 100644
--- a/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -1,9 +1,8 @@
//===- CoroCleanup.cpp - Coroutine Cleanup Pass ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This pass lowers all remaining coroutine intrinsics.
@@ -50,7 +49,7 @@ static void lowerSubFn(IRBuilder<> &Builder, CoroSubFnInst *SubFn) {
Builder.SetInsertPoint(SubFn);
auto *FramePtr = Builder.CreateBitCast(FrameRaw, FramePtrTy);
auto *Gep = Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, Index);
- auto *Load = Builder.CreateLoad(Gep);
+ auto *Load = Builder.CreateLoad(FrameTy->getElementType(Index), Gep);
SubFn->replaceAllUsesWith(Load);
}
diff --git a/lib/Transforms/Coroutines/CoroEarly.cpp b/lib/Transforms/Coroutines/CoroEarly.cpp
index ac47a06281a5..692697d6f32e 100644
--- a/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -1,9 +1,8 @@
//===- CoroEarly.cpp - Coroutine Early Function Pass ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This pass lowers coroutine intrinsics that hide the details of the exact
@@ -98,7 +97,7 @@ void Lowerer::lowerCoroDone(IntrinsicInst *II) {
Builder.SetInsertPoint(II);
auto *BCI = Builder.CreateBitCast(Operand, FramePtrTy);
auto *Gep = Builder.CreateConstInBoundsGEP1_32(FrameTy, BCI, 0);
- auto *Load = Builder.CreateLoad(Gep);
+ auto *Load = Builder.CreateLoad(FrameTy, Gep);
auto *Cond = Builder.CreateICmpEQ(Load, NullPtr);
II->replaceAllUsesWith(Cond);
@@ -114,7 +113,7 @@ void Lowerer::lowerCoroNoop(IntrinsicInst *II) {
StructType *FrameTy = StructType::create(C, "NoopCoro.Frame");
auto *FramePtrTy = FrameTy->getPointerTo();
auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
- /*IsVarArgs=*/false);
+ /*isVarArg=*/false);
auto *FnPtrTy = FnTy->getPointerTo();
FrameTy->setBody({FnPtrTy, FnPtrTy});
diff --git a/lib/Transforms/Coroutines/CoroElide.cpp b/lib/Transforms/Coroutines/CoroElide.cpp
index 58f952b54f3a..6707aa1c827d 100644
--- a/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/lib/Transforms/Coroutines/CoroElide.cpp
@@ -1,9 +1,8 @@
//===- CoroElide.cpp - Coroutine Frame Allocation Elision Pass ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This pass replaces dynamic allocation of coroutine frame with alloca and
diff --git a/lib/Transforms/Coroutines/CoroFrame.cpp b/lib/Transforms/Coroutines/CoroFrame.cpp
index 4cb0a52961cc..58bf22bee29b 100644
--- a/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -1,9 +1,8 @@
//===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file contains classes used to discover if for a particular value
@@ -53,7 +52,7 @@ public:
}
size_t blockToIndex(BasicBlock *BB) const {
- auto *I = std::lower_bound(V.begin(), V.end(), BB);
+ auto *I = llvm::lower_bound(V, BB);
assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
return I - V.begin();
}
@@ -379,7 +378,7 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
StructType *FrameTy = StructType::create(C, Name);
auto *FramePtrTy = FrameTy->getPointerTo();
auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
- /*IsVarArgs=*/false);
+ /*isVarArg=*/false);
auto *FnPtrTy = FnTy->getPointerTo();
// Figure out how wide should be an integer type storing the suspend index.
@@ -403,6 +402,7 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
if (CurrentDef == Shape.PromiseAlloca)
continue;
+ uint64_t Count = 1;
Type *Ty = nullptr;
if (auto *AI = dyn_cast<AllocaInst>(CurrentDef)) {
Ty = AI->getAllocatedType();
@@ -414,11 +414,18 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
Padder.addType(PaddingTy);
}
}
+ if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
+ Count = CI->getValue().getZExtValue();
+ else
+ report_fatal_error("Coroutines cannot handle non static allocas yet");
} else {
Ty = CurrentDef->getType();
}
S.setFieldIndex(Types.size());
- Types.push_back(Ty);
+ if (Count == 1)
+ Types.push_back(Ty);
+ else
+ Types.push_back(ArrayType::get(Ty, Count));
Padder.addType(Ty);
}
FrameTy->setBody(Types);
@@ -471,11 +478,12 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
//
static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
auto *CB = Shape.CoroBegin;
+ LLVMContext &C = CB->getContext();
IRBuilder<> Builder(CB->getNextNode());
- PointerType *FramePtrTy = Shape.FrameTy->getPointerTo();
+ StructType *FrameTy = Shape.FrameTy;
+ PointerType *FramePtrTy = FrameTy->getPointerTo();
auto *FramePtr =
cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
- Type *FrameTy = FramePtrTy->getElementType();
Value *CurrentValue = nullptr;
BasicBlock *CurrentBlock = nullptr;
@@ -492,17 +500,41 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
if (Shape.PromiseAlloca)
Allocas.emplace_back(Shape.PromiseAlloca, coro::Shape::PromiseField);
+ // Create a GEP with the given index into the coroutine frame for the original
+ // value Orig. Appends an extra 0 index for array-allocas, preserving the
+ // original type.
+ auto GetFramePointer = [&](uint32_t Index, Value *Orig) -> Value * {
+ SmallVector<Value *, 3> Indices = {
+ ConstantInt::get(Type::getInt32Ty(C), 0),
+ ConstantInt::get(Type::getInt32Ty(C), Index),
+ };
+
+ if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
+ if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+ auto Count = CI->getValue().getZExtValue();
+ if (Count > 1) {
+ Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
+ }
+ } else {
+ report_fatal_error("Coroutines cannot handle non static allocas yet");
+ }
+ }
+
+ return Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices);
+ };
+
// Create a load instruction to reload the spilled value from the coroutine
// frame.
auto CreateReload = [&](Instruction *InsertBefore) {
assert(Index && "accessing unassigned field number");
Builder.SetInsertPoint(InsertBefore);
- auto *G = Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, Index,
- CurrentValue->getName() +
- Twine(".reload.addr"));
+
+ auto *G = GetFramePointer(Index, CurrentValue);
+ G->setName(CurrentValue->getName() + Twine(".reload.addr"));
+
return isa<AllocaInst>(CurrentValue)
? G
- : Builder.CreateLoad(G,
+ : Builder.CreateLoad(FrameTy->getElementType(Index), G,
CurrentValue->getName() + Twine(".reload"));
};
@@ -589,8 +621,8 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
// If we found any allocas, replace all of their remaining uses with Geps.
for (auto &P : Allocas) {
- auto *G =
- Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, P.second);
+ auto *G = GetFramePointer(P.second, P.first);
+
// We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G)) here,
// as we are changing location of the instruction.
G->takeName(P.first);
diff --git a/lib/Transforms/Coroutines/CoroInstr.h b/lib/Transforms/Coroutines/CoroInstr.h
index 9a8cc5a2591c..5e19d7642e38 100644
--- a/lib/Transforms/Coroutines/CoroInstr.h
+++ b/lib/Transforms/Coroutines/CoroInstr.h
@@ -1,9 +1,8 @@
//===-- CoroInstr.h - Coroutine Intrinsics Instruction Wrappers -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file defines classes that make it really easy to deal with intrinsic
diff --git a/lib/Transforms/Coroutines/CoroInternal.h b/lib/Transforms/Coroutines/CoroInternal.h
index 8e690d649cf5..441c8a20f1f3 100644
--- a/lib/Transforms/Coroutines/CoroInternal.h
+++ b/lib/Transforms/Coroutines/CoroInternal.h
@@ -1,9 +1,8 @@
//===- CoroInternal.h - Internal Coroutine interfaces ---------*- C++ -*---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Common definitions/declarations used internally by coroutine lowering passes.
diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp
index 9eeceb217ba8..5458e70ff16a 100644
--- a/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -1,9 +1,8 @@
//===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This pass builds the coroutine frame and outlines resume and destroy parts
@@ -94,7 +93,7 @@ static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) {
auto *FrameTy = Shape.FrameTy;
auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(
FrameTy, FramePtr, 0, coro::Shape::IndexField, "index.addr");
- auto *Index = Builder.CreateLoad(GepIndex, "index");
+ auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index");
auto *Switch =
Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size());
Shape.ResumeSwitch = Switch;
@@ -230,7 +229,8 @@ static void handleFinalSuspend(IRBuilder<> &Builder, Value *FramePtr,
Builder.SetInsertPoint(OldSwitchBB->getTerminator());
auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(Shape.FrameTy, FramePtr,
0, 0, "ResumeFn.addr");
- auto *Load = Builder.CreateLoad(GepIndex);
+ auto *Load = Builder.CreateLoad(
+ Shape.FrameTy->getElementType(coro::Shape::ResumeField), GepIndex);
auto *NullPtr =
ConstantPointerNull::get(cast<PointerType>(Load->getType()));
auto *Cond = Builder.CreateICmpEQ(Load, NullPtr);
@@ -777,6 +777,8 @@ static void relocateInstructionBefore(CoroBeginInst *CoroBegin, Function &F) {
}
static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
+ EliminateUnreachableBlocks(F);
+
coro::Shape Shape(F);
if (!Shape.CoroBegin)
return;
@@ -828,6 +830,7 @@ static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
// split.
static void prepareForSplit(Function &F, CallGraph &CG) {
Module &M = *F.getParent();
+ LLVMContext &Context = F.getContext();
#ifndef NDEBUG
Function *DevirtFn = M.getFunction(CORO_DEVIRT_TRIGGER_FN);
assert(DevirtFn && "coro.devirt.trigger function not found");
@@ -842,10 +845,12 @@ static void prepareForSplit(Function &F, CallGraph &CG) {
// call void %1(i8* null)
coro::LowererBase Lowerer(M);
Instruction *InsertPt = F.getEntryBlock().getTerminator();
- auto *Null = ConstantPointerNull::get(Type::getInt8PtrTy(F.getContext()));
+ auto *Null = ConstantPointerNull::get(Type::getInt8PtrTy(Context));
auto *DevirtFnAddr =
Lowerer.makeSubFnCall(Null, CoroSubFnInst::RestartTrigger, InsertPt);
- auto *IndirectCall = CallInst::Create(DevirtFnAddr, Null, "", InsertPt);
+ FunctionType *FnTy = FunctionType::get(Type::getVoidTy(Context),
+ {Type::getInt8PtrTy(Context)}, false);
+ auto *IndirectCall = CallInst::Create(FnTy, DevirtFnAddr, Null, "", InsertPt);
// Update CG graph with an indirect call we just added.
CG[&F]->addCalledFunction(IndirectCall, CG.getCallsExternalNode());
@@ -861,7 +866,7 @@ static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) {
LLVMContext &C = M.getContext();
auto *FnTy = FunctionType::get(Type::getVoidTy(C), Type::getInt8PtrTy(C),
- /*IsVarArgs=*/false);
+ /*isVarArg=*/false);
Function *DevirtFn =
Function::Create(FnTy, GlobalValue::LinkageTypes::PrivateLinkage,
CORO_DEVIRT_TRIGGER_FN, &M);
@@ -941,7 +946,12 @@ struct CoroSplit : public CallGraphSCCPass {
char CoroSplit::ID = 0;
-INITIALIZE_PASS(
+INITIALIZE_PASS_BEGIN(
+ CoroSplit, "coro-split",
+ "Split coroutine into a set of functions driving its state machine", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_END(
CoroSplit, "coro-split",
"Split coroutine into a set of functions driving its state machine", false,
false)
diff --git a/lib/Transforms/Coroutines/Coroutines.cpp b/lib/Transforms/Coroutines/Coroutines.cpp
index cf84f916e24b..a581d1d21169 100644
--- a/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/lib/Transforms/Coroutines/Coroutines.cpp
@@ -1,9 +1,8 @@
//===- Coroutines.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -177,15 +176,15 @@ static void buildCGN(CallGraph &CG, CallGraphNode *Node) {
// Look for calls by this function.
for (Instruction &I : instructions(F))
- if (CallSite CS = CallSite(cast<Value>(&I))) {
- const Function *Callee = CS.getCalledFunction();
+ if (auto *Call = dyn_cast<CallBase>(&I)) {
+ const Function *Callee = Call->getCalledFunction();
if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
// Indirect calls of intrinsics are not allowed so no need to check.
// We can be more precise here by using TargetArg returned by
// Intrinsic::isLeaf.
- Node->addCalledFunction(CS, CG.getCallsExternalNode());
+ Node->addCalledFunction(Call, CG.getCallsExternalNode());
else if (!Callee->isIntrinsic())
- Node->addCalledFunction(CS, CG.getOrInsertFunction(Callee));
+ Node->addCalledFunction(Call, CG.getOrInsertFunction(Callee));
}
}
diff --git a/lib/Transforms/IPO/AlwaysInliner.cpp b/lib/Transforms/IPO/AlwaysInliner.cpp
index 07138718ce2c..c50805692b98 100644
--- a/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -1,9 +1,8 @@
//===- InlineAlways.cpp - Code to inline always_inline functions ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,8 +31,17 @@ using namespace llvm;
#define DEBUG_TYPE "inline"
-PreservedAnalyses AlwaysInlinerPass::run(Module &M, ModuleAnalysisManager &) {
- InlineFunctionInfo IFI;
+PreservedAnalyses AlwaysInlinerPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ // Add inline assumptions during code generation.
+ FunctionAnalysisManager &FAM =
+ MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ std::function<AssumptionCache &(Function &)> GetAssumptionCache =
+ [&](Function &F) -> AssumptionCache & {
+ return FAM.getResult<AssumptionAnalysis>(F);
+ };
+ InlineFunctionInfo IFI(/*cg=*/nullptr, &GetAssumptionCache);
+
SmallSetVector<CallSite, 16> Calls;
bool Changed = false;
SmallVector<Function *, 16> InlinedFunctions;
@@ -146,11 +154,20 @@ InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallSite CS) {
Function *Callee = CS.getCalledFunction();
// Only inline direct calls to functions with always-inline attributes
- // that are viable for inlining. FIXME: We shouldn't even get here for
- // declarations.
- if (Callee && !Callee->isDeclaration() &&
- CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee))
- return InlineCost::getAlways("always inliner");
+ // that are viable for inlining.
+ if (!Callee)
+ return InlineCost::getNever("indirect call");
+
+ // FIXME: We shouldn't even get here for declarations.
+ if (Callee->isDeclaration())
+ return InlineCost::getNever("no definition");
+
+ if (!CS.hasFnAttr(Attribute::AlwaysInline))
+ return InlineCost::getNever("no alwaysinline attribute");
+
+ auto IsViable = isInlineViable(*Callee);
+ if (!IsViable)
+ return InlineCost::getNever(IsViable.message);
- return InlineCost::getNever("always inliner");
+ return InlineCost::getAlways("always inliner");
}
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 4663de0b049e..95a9f31cced3 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -1,9 +1,8 @@
//===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -59,11 +58,13 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/NoFolder.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -243,6 +244,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
assert(CS.getCalledFunction() == F);
Instruction *Call = CS.getInstruction();
const AttributeList &CallPAL = CS.getAttributes();
+ IRBuilder<NoFolder> IRB(Call);
// Loop over the operands, inserting GEP and loads in the caller as
// appropriate.
@@ -261,10 +263,11 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr};
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- Value *Idx = GetElementPtrInst::Create(
- STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call);
+ auto *Idx =
+ IRB.CreateGEP(STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i));
// TODO: Tell AA about the new values?
- Args.push_back(new LoadInst(Idx, Idx->getName() + ".val", Call));
+ Args.push_back(IRB.CreateLoad(STy->getElementType(i), Idx,
+ Idx->getName() + ".val"));
ArgAttrVec.push_back(AttributeSet());
}
} else if (!I->use_empty()) {
@@ -294,13 +297,13 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II);
}
// And create a GEP to extract those indices.
- V = GetElementPtrInst::Create(ArgIndex.first, V, Ops,
- V->getName() + ".idx", Call);
+ V = IRB.CreateGEP(ArgIndex.first, V, Ops, V->getName() + ".idx");
Ops.clear();
}
// Since we're replacing a load make sure we take the alignment
// of the previous load.
- LoadInst *newLoad = new LoadInst(V, V->getName() + ".val", Call);
+ LoadInst *newLoad =
+ IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val");
newLoad->setAlignment(OrigLoad->getAlignment());
// Transfer the AA info too.
AAMDNodes AAInfo;
@@ -476,9 +479,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
return NF;
}
-/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
-/// all callees pass in a valid pointer for the specified function argument.
-static bool allCallersPassInValidPointerForArgument(Argument *Arg) {
+/// Return true if we can prove that all callees pass in a valid pointer for the
+/// specified function argument.
+static bool allCallersPassValidPointerForArgument(Argument *Arg, Type *Ty) {
Function *Callee = Arg->getParent();
const DataLayout &DL = Callee->getParent()->getDataLayout();
@@ -490,7 +493,7 @@ static bool allCallersPassInValidPointerForArgument(Argument *Arg) {
CallSite CS(U);
assert(CS && "Should only have direct calls!");
- if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL))
+ if (!isDereferenceablePointer(CS.getArgument(ArgNo), Ty, DL))
return false;
}
return true;
@@ -563,8 +566,8 @@ static void markIndicesSafe(const IndicesVector &ToMark,
/// This method limits promotion of aggregates to only promote up to three
/// elements of the aggregate in order to avoid exploding the number of
/// arguments passed in.
-static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
- AAResults &AAR, unsigned MaxElements) {
+static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR,
+ unsigned MaxElements) {
using GEPIndicesSet = std::set<IndicesVector>;
// Quick exit for unused arguments
@@ -586,9 +589,6 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
//
// This set will contain all sets of indices that are loaded in the entry
// block, and thus are safe to unconditionally load in the caller.
- //
- // This optimization is also safe for InAlloca parameters, because it verifies
- // that the address isn't captured.
GEPIndicesSet SafeToUnconditionallyLoad;
// This set contains all the sets of indices that we are planning to promote.
@@ -596,9 +596,28 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
GEPIndicesSet ToPromote;
// If the pointer is always valid, any load with first index 0 is valid.
- if (isByValOrInAlloca || allCallersPassInValidPointerForArgument(Arg))
+
+ if (ByValTy)
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
+ // Whenever a new underlying type for the operand is found, make sure it's
+ // consistent with the GEPs and loads we've already seen and, if necessary,
+ // use it to see if all incoming pointers are valid (which implies the 0-index
+ // is safe).
+ Type *BaseTy = ByValTy;
+ auto UpdateBaseTy = [&](Type *NewBaseTy) {
+ if (BaseTy)
+ return BaseTy == NewBaseTy;
+
+ BaseTy = NewBaseTy;
+ if (allCallersPassValidPointerForArgument(Arg, BaseTy)) {
+ assert(SafeToUnconditionallyLoad.empty());
+ SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
+ }
+
+ return true;
+ };
+
// First, iterate the entry block and mark loads of (geps of) arguments as
// safe.
BasicBlock &EntryBlock = Arg->getParent()->front();
@@ -621,6 +640,9 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
// right away, can't promote this argument at all.
return false;
+ if (!UpdateBaseTy(GEP->getSourceElementType()))
+ return false;
+
// Indices checked out, mark them as safe
markIndicesSafe(Indices, SafeToUnconditionallyLoad);
Indices.clear();
@@ -628,6 +650,11 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
} else if (V == Arg) {
// Direct loads are equivalent to a GEP with a single 0 index.
markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
+
+ if (BaseTy && LI->getType() != BaseTy)
+ return false;
+
+ BaseTy = LI->getType();
}
}
@@ -645,6 +672,9 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
Loads.push_back(LI);
// Direct loads are equivalent to a GEP with a zero index and then a load.
Operands.push_back(0);
+
+ if (!UpdateBaseTy(LI->getType()))
+ return false;
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UR)) {
if (GEP->use_empty()) {
// Dead GEP's cause trouble later. Just remove them if we run into
@@ -653,10 +683,12 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
// TODO: This runs the above loop over and over again for dead GEPs
// Couldn't we just do increment the UI iterator earlier and erase the
// use?
- return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR,
- MaxElements);
+ return isSafeToPromoteArgument(Arg, ByValTy, AAR, MaxElements);
}
+ if (!UpdateBaseTy(GEP->getSourceElementType()))
+ return false;
+
// Ensure that all of the indices are constants.
for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end(); i != e;
++i)
@@ -853,6 +885,11 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
if (F->isVarArg())
return nullptr;
+ // Don't transform functions that receive inallocas, as the transformation may
+ // not be safe depending on calling convention.
+ if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
+ return nullptr;
+
// First check: see if there are any pointer arguments! If not, quick exit.
SmallVector<Argument *, 16> PointerArgs;
for (Argument &I : F->args())
@@ -911,8 +948,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
// If this is a byval argument, and if the aggregate type is small, just
// pass the elements, which is always safe, if the passed value is densely
- // packed or if we can prove the padding bytes are never accessed. This does
- // not apply to inalloca.
+ // packed or if we can prove the padding bytes are never accessed.
bool isSafeToPromote =
PtrArg->hasByValAttr() &&
(isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
@@ -963,8 +999,9 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
}
// Otherwise, see if we can promote the pointer to its value.
- if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR,
- MaxElements))
+ Type *ByValTy =
+ PtrArg->hasByValAttr() ? PtrArg->getParamByValType() : nullptr;
+ if (isSafeToPromoteArgument(PtrArg, ByValTy, AAR, MaxElements))
ArgsToPromote.insert(PtrArg);
}
@@ -1101,7 +1138,9 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
CallGraphNode *NewCalleeNode =
CG.getOrInsertFunction(NewCS.getCalledFunction());
CallGraphNode *CallerNode = CG[Caller];
- CallerNode->replaceCallEdge(OldCS, NewCS, NewCalleeNode);
+ CallerNode->replaceCallEdge(*cast<CallBase>(OldCS.getInstruction()),
+ *cast<CallBase>(NewCS.getInstruction()),
+ NewCalleeNode);
};
const TargetTransformInfo &TTI =
diff --git a/lib/Transforms/IPO/Attributor.cpp b/lib/Transforms/IPO/Attributor.cpp
new file mode 100644
index 000000000000..2a52c6b9b4ad
--- /dev/null
+++ b/lib/Transforms/IPO/Attributor.cpp
@@ -0,0 +1,1690 @@
+//===- Attributor.cpp - Module-wide attribute deduction -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an inter procedural pass that deduces and/or propagating
+// attributes. This is done in an abstract interpretation style fixpoint
+// iteration. See the Attributor.h file comment and the class descriptions in
+// that file for more information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/Attributor.h"
+
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "attributor"
+
+STATISTIC(NumFnWithExactDefinition,
+ "Number of function with exact definitions");
+STATISTIC(NumFnWithoutExactDefinition,
+ "Number of function without exact definitions");
+STATISTIC(NumAttributesTimedOut,
+ "Number of abstract attributes timed out before fixpoint");
+STATISTIC(NumAttributesValidFixpoint,
+ "Number of abstract attributes in a valid fixpoint state");
+STATISTIC(NumAttributesManifested,
+ "Number of abstract attributes manifested in IR");
+STATISTIC(NumFnNoUnwind, "Number of functions marked nounwind");
+
+STATISTIC(NumFnUniqueReturned, "Number of function with unique return");
+STATISTIC(NumFnKnownReturns, "Number of function with known return values");
+STATISTIC(NumFnArgumentReturned,
+ "Number of function arguments marked returned");
+STATISTIC(NumFnNoSync, "Number of functions marked nosync");
+STATISTIC(NumFnNoFree, "Number of functions marked nofree");
+STATISTIC(NumFnReturnedNonNull,
+ "Number of function return values marked nonnull");
+STATISTIC(NumFnArgumentNonNull, "Number of function arguments marked nonnull");
+STATISTIC(NumCSArgumentNonNull, "Number of call site arguments marked nonnull");
+STATISTIC(NumFnWillReturn, "Number of functions marked willreturn");
+
+// TODO: Determine a good default value.
+//
+// In the LLVM-TS and SPEC2006, 32 seems to not induce compile time overheads
+// (when run with the first 5 abstract attributes). The results also indicate
+// that we never reach 32 iterations but always find a fixpoint sooner.
+//
+// This will become more evolved once we perform two interleaved fixpoint
+// iterations: bottom-up and top-down.
+static cl::opt<unsigned>
+ MaxFixpointIterations("attributor-max-iterations", cl::Hidden,
+ cl::desc("Maximal number of fixpoint iterations."),
+ cl::init(32));
+
+static cl::opt<bool> DisableAttributor(
+ "attributor-disable", cl::Hidden,
+ cl::desc("Disable the attributor inter-procedural deduction pass."),
+ cl::init(true));
+
+static cl::opt<bool> VerifyAttributor(
+ "attributor-verify", cl::Hidden,
+ cl::desc("Verify the Attributor deduction and "
+ "manifestation of attributes -- may issue false-positive errors"),
+ cl::init(false));
+
+/// Logic operators for the change status enum class.
+///
+///{
+ChangeStatus llvm::operator|(ChangeStatus l, ChangeStatus r) {
+ return l == ChangeStatus::CHANGED ? l : r;
+}
+ChangeStatus llvm::operator&(ChangeStatus l, ChangeStatus r) {
+ return l == ChangeStatus::UNCHANGED ? l : r;
+}
+///}
+
+/// Helper to adjust the statistics.
+static void bookkeeping(AbstractAttribute::ManifestPosition MP,
+ const Attribute &Attr) {
+ if (!AreStatisticsEnabled())
+ return;
+
+ if (!Attr.isEnumAttribute())
+ return;
+ switch (Attr.getKindAsEnum()) {
+ case Attribute::NoUnwind:
+ NumFnNoUnwind++;
+ return;
+ case Attribute::Returned:
+ NumFnArgumentReturned++;
+ return;
+ case Attribute::NoSync:
+ NumFnNoSync++;
+ break;
+ case Attribute::NoFree:
+ NumFnNoFree++;
+ break;
+ case Attribute::NonNull:
+ switch (MP) {
+ case AbstractAttribute::MP_RETURNED:
+ NumFnReturnedNonNull++;
+ break;
+ case AbstractAttribute::MP_ARGUMENT:
+ NumFnArgumentNonNull++;
+ break;
+ case AbstractAttribute::MP_CALL_SITE_ARGUMENT:
+ NumCSArgumentNonNull++;
+ break;
+ default:
+ break;
+ }
+ break;
+ case Attribute::WillReturn:
+ NumFnWillReturn++;
+ break;
+ default:
+ return;
+ }
+}
+
+template <typename StateTy>
+using followValueCB_t = std::function<bool(Value *, StateTy &State)>;
+template <typename StateTy>
+using visitValueCB_t = std::function<void(Value *, StateTy &State)>;
+
+/// Recursively visit all values that might become \p InitV at some point. This
+/// will be done by looking through cast instructions, selects, phis, and calls
+/// with the "returned" attribute. The callback \p FollowValueCB is asked before
+/// a potential origin value is looked at. If no \p FollowValueCB is passed, a
+/// default one is used that will make sure we visit every value only once. Once
+/// we cannot look through the value any further, the callback \p VisitValueCB
+/// is invoked and passed the current value and the \p State. To limit how much
+/// effort is invested, we will never visit more than \p MaxValues values.
+template <typename StateTy>
+static bool genericValueTraversal(
+ Value *InitV, StateTy &State, visitValueCB_t<StateTy> &VisitValueCB,
+ followValueCB_t<StateTy> *FollowValueCB = nullptr, int MaxValues = 8) {
+
+ SmallPtrSet<Value *, 16> Visited;
+ followValueCB_t<bool> DefaultFollowValueCB = [&](Value *Val, bool &) {
+ return Visited.insert(Val).second;
+ };
+
+ if (!FollowValueCB)
+ FollowValueCB = &DefaultFollowValueCB;
+
+ SmallVector<Value *, 16> Worklist;
+ Worklist.push_back(InitV);
+
+ int Iteration = 0;
+ do {
+ Value *V = Worklist.pop_back_val();
+
+ // Check if we should process the current value. To prevent endless
+ // recursion keep a record of the values we followed!
+ if (!(*FollowValueCB)(V, State))
+ continue;
+
+ // Make sure we limit the compile time for complex expressions.
+ if (Iteration++ >= MaxValues)
+ return false;
+
+ // Explicitly look through calls with a "returned" attribute if we do
+ // not have a pointer as stripPointerCasts only works on them.
+ if (V->getType()->isPointerTy()) {
+ V = V->stripPointerCasts();
+ } else {
+ CallSite CS(V);
+ if (CS && CS.getCalledFunction()) {
+ Value *NewV = nullptr;
+ for (Argument &Arg : CS.getCalledFunction()->args())
+ if (Arg.hasReturnedAttr()) {
+ NewV = CS.getArgOperand(Arg.getArgNo());
+ break;
+ }
+ if (NewV) {
+ Worklist.push_back(NewV);
+ continue;
+ }
+ }
+ }
+
+ // Look through select instructions, visit both potential values.
+ if (auto *SI = dyn_cast<SelectInst>(V)) {
+ Worklist.push_back(SI->getTrueValue());
+ Worklist.push_back(SI->getFalseValue());
+ continue;
+ }
+
+ // Look through phi nodes, visit all operands.
+ if (auto *PHI = dyn_cast<PHINode>(V)) {
+ Worklist.append(PHI->op_begin(), PHI->op_end());
+ continue;
+ }
+
+ // Once a leaf is reached we inform the user through the callback.
+ VisitValueCB(V, State);
+ } while (!Worklist.empty());
+
+ // All values have been visited.
+ return true;
+}
+
+/// Helper to identify the correct offset into an attribute list.
+static unsigned getAttrIndex(AbstractAttribute::ManifestPosition MP,
+ unsigned ArgNo = 0) {
+ switch (MP) {
+ case AbstractAttribute::MP_ARGUMENT:
+ case AbstractAttribute::MP_CALL_SITE_ARGUMENT:
+ return ArgNo + AttributeList::FirstArgIndex;
+ case AbstractAttribute::MP_FUNCTION:
+ return AttributeList::FunctionIndex;
+ case AbstractAttribute::MP_RETURNED:
+ return AttributeList::ReturnIndex;
+ }
+ llvm_unreachable("Unknown manifest position!");
+}
+
+/// Return true if \p New is equal or worse than \p Old.
+static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) {
+ if (!Old.isIntAttribute())
+ return true;
+
+ return Old.getValueAsInt() >= New.getValueAsInt();
+}
+
+/// Return true if the information provided by \p Attr was added to the
+/// attribute list \p Attrs. This is only the case if it was not already present
+/// in \p Attrs at the position describe by \p MP and \p ArgNo.
+static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
+ AttributeList &Attrs,
+ AbstractAttribute::ManifestPosition MP,
+ unsigned ArgNo = 0) {
+ unsigned AttrIdx = getAttrIndex(MP, ArgNo);
+
+ if (Attr.isEnumAttribute()) {
+ Attribute::AttrKind Kind = Attr.getKindAsEnum();
+ if (Attrs.hasAttribute(AttrIdx, Kind))
+ if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+ return false;
+ Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+ return true;
+ }
+ if (Attr.isStringAttribute()) {
+ StringRef Kind = Attr.getKindAsString();
+ if (Attrs.hasAttribute(AttrIdx, Kind))
+ if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+ return false;
+ Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+ return true;
+ }
+
+ llvm_unreachable("Expected enum or string attribute!");
+}
+
+ChangeStatus AbstractAttribute::update(Attributor &A) {
+ ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
+ if (getState().isAtFixpoint())
+ return HasChanged;
+
+ LLVM_DEBUG(dbgs() << "[Attributor] Update: " << *this << "\n");
+
+ HasChanged = updateImpl(A);
+
+ LLVM_DEBUG(dbgs() << "[Attributor] Update " << HasChanged << " " << *this
+ << "\n");
+
+ return HasChanged;
+}
+
+ChangeStatus AbstractAttribute::manifest(Attributor &A) {
+ assert(getState().isValidState() &&
+ "Attempted to manifest an invalid state!");
+ assert(getAssociatedValue() &&
+ "Attempted to manifest an attribute without associated value!");
+
+ ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
+ SmallVector<Attribute, 4> DeducedAttrs;
+ getDeducedAttributes(DeducedAttrs);
+
+ Function &ScopeFn = getAnchorScope();
+ LLVMContext &Ctx = ScopeFn.getContext();
+ ManifestPosition MP = getManifestPosition();
+
+ AttributeList Attrs;
+ SmallVector<unsigned, 4> ArgNos;
+
+ // In the following some generic code that will manifest attributes in
+ // DeducedAttrs if they improve the current IR. Due to the different
+ // annotation positions we use the underlying AttributeList interface.
+ // Note that MP_CALL_SITE_ARGUMENT can annotate multiple locations.
+
+ switch (MP) {
+ case MP_ARGUMENT:
+ ArgNos.push_back(cast<Argument>(getAssociatedValue())->getArgNo());
+ Attrs = ScopeFn.getAttributes();
+ break;
+ case MP_FUNCTION:
+ case MP_RETURNED:
+ ArgNos.push_back(0);
+ Attrs = ScopeFn.getAttributes();
+ break;
+ case MP_CALL_SITE_ARGUMENT: {
+ CallSite CS(&getAnchoredValue());
+ for (unsigned u = 0, e = CS.getNumArgOperands(); u != e; u++)
+ if (CS.getArgOperand(u) == getAssociatedValue())
+ ArgNos.push_back(u);
+ Attrs = CS.getAttributes();
+ }
+ }
+
+ for (const Attribute &Attr : DeducedAttrs) {
+ for (unsigned ArgNo : ArgNos) {
+ if (!addIfNotExistent(Ctx, Attr, Attrs, MP, ArgNo))
+ continue;
+
+ HasChanged = ChangeStatus::CHANGED;
+ bookkeeping(MP, Attr);
+ }
+ }
+
+ if (HasChanged == ChangeStatus::UNCHANGED)
+ return HasChanged;
+
+ switch (MP) {
+ case MP_ARGUMENT:
+ case MP_FUNCTION:
+ case MP_RETURNED:
+ ScopeFn.setAttributes(Attrs);
+ break;
+ case MP_CALL_SITE_ARGUMENT:
+ CallSite(&getAnchoredValue()).setAttributes(Attrs);
+ }
+
+ return HasChanged;
+}
+
+Function &AbstractAttribute::getAnchorScope() {
+ Value &V = getAnchoredValue();
+ if (isa<Function>(V))
+ return cast<Function>(V);
+ if (isa<Argument>(V))
+ return *cast<Argument>(V).getParent();
+ if (isa<Instruction>(V))
+ return *cast<Instruction>(V).getFunction();
+ llvm_unreachable("No scope for anchored value found!");
+}
+
+const Function &AbstractAttribute::getAnchorScope() const {
+ return const_cast<AbstractAttribute *>(this)->getAnchorScope();
+}
+
+/// -----------------------NoUnwind Function Attribute--------------------------
+
+struct AANoUnwindFunction : AANoUnwind, BooleanState {
+
+ AANoUnwindFunction(Function &F, InformationCache &InfoCache)
+ : AANoUnwind(F, InfoCache) {}
+
+ /// See AbstractAttribute::getState()
+ /// {
+ AbstractState &getState() override { return *this; }
+ const AbstractState &getState() const override { return *this; }
+ /// }
+
+ /// See AbstractAttribute::getManifestPosition().
+ ManifestPosition getManifestPosition() const override { return MP_FUNCTION; }
+
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nounwind" : "may-unwind";
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+
+ /// See AANoUnwind::isAssumedNoUnwind().
+ bool isAssumedNoUnwind() const override { return getAssumed(); }
+
+ /// See AANoUnwind::isKnownNoUnwind().
+ bool isKnownNoUnwind() const override { return getKnown(); }
+};
+
+ChangeStatus AANoUnwindFunction::updateImpl(Attributor &A) {
+ Function &F = getAnchorScope();
+
+ // The map from instruction opcodes to those instructions in the function.
+ auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+ auto Opcodes = {
+ (unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
+ (unsigned)Instruction::Call, (unsigned)Instruction::CleanupRet,
+ (unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume};
+
+ for (unsigned Opcode : Opcodes) {
+ for (Instruction *I : OpcodeInstMap[Opcode]) {
+ if (!I->mayThrow())
+ continue;
+
+ auto *NoUnwindAA = A.getAAFor<AANoUnwind>(*this, *I);
+
+ if (!NoUnwindAA || !NoUnwindAA->isAssumedNoUnwind()) {
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+ }
+ }
+ return ChangeStatus::UNCHANGED;
+}
+
+/// --------------------- Function Return Values -------------------------------
+
+/// "Attribute" that collects all potential returned values and the return
+/// instructions that they arise from.
+///
+/// If there is a unique returned value R, the manifest method will:
+/// - mark R with the "returned" attribute, if R is an argument.
+class AAReturnedValuesImpl final : public AAReturnedValues, AbstractState {
+
+ /// Mapping of values potentially returned by the associated function to the
+ /// return instructions that might return them.
+ DenseMap<Value *, SmallPtrSet<ReturnInst *, 2>> ReturnedValues;
+
+ /// State flags
+ ///
+ ///{
+ bool IsFixed;
+ bool IsValidState;
+ bool HasOverdefinedReturnedCalls;
+ ///}
+
+ /// Collect values that could become \p V in the set \p Values, each mapped to
+ /// \p ReturnInsts.
+ void collectValuesRecursively(
+ Attributor &A, Value *V, SmallPtrSetImpl<ReturnInst *> &ReturnInsts,
+ DenseMap<Value *, SmallPtrSet<ReturnInst *, 2>> &Values) {
+
+ visitValueCB_t<bool> VisitValueCB = [&](Value *Val, bool &) {
+ assert(!isa<Instruction>(Val) ||
+ &getAnchorScope() == cast<Instruction>(Val)->getFunction());
+ Values[Val].insert(ReturnInsts.begin(), ReturnInsts.end());
+ };
+
+ bool UnusedBool;
+ bool Success = genericValueTraversal(V, UnusedBool, VisitValueCB);
+
+ // If we did abort the above traversal we haven't see all the values.
+ // Consequently, we cannot know if the information we would derive is
+ // accurate so we give up early.
+ if (!Success)
+ indicatePessimisticFixpoint();
+ }
+
+public:
+ /// See AbstractAttribute::AbstractAttribute(...).
+ AAReturnedValuesImpl(Function &F, InformationCache &InfoCache)
+ : AAReturnedValues(F, InfoCache) {
+ // We do not have an associated argument yet.
+ AssociatedVal = nullptr;
+ }
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // Reset the state.
+ AssociatedVal = nullptr;
+ IsFixed = false;
+ IsValidState = true;
+ HasOverdefinedReturnedCalls = false;
+ ReturnedValues.clear();
+
+ Function &F = cast<Function>(getAnchoredValue());
+
+ // The map from instruction opcodes to those instructions in the function.
+ auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+
+ // Look through all arguments, if one is marked as returned we are done.
+ for (Argument &Arg : F.args()) {
+ if (Arg.hasReturnedAttr()) {
+
+ auto &ReturnInstSet = ReturnedValues[&Arg];
+ for (Instruction *RI : OpcodeInstMap[Instruction::Ret])
+ ReturnInstSet.insert(cast<ReturnInst>(RI));
+
+ indicateOptimisticFixpoint();
+ return;
+ }
+ }
+
+ // If no argument was marked as returned we look at all return instructions
+ // and collect potentially returned values.
+ for (Instruction *RI : OpcodeInstMap[Instruction::Ret]) {
+ SmallPtrSet<ReturnInst *, 1> RISet({cast<ReturnInst>(RI)});
+ collectValuesRecursively(A, cast<ReturnInst>(RI)->getReturnValue(), RISet,
+ ReturnedValues);
+ }
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override;
+
+ /// See AbstractAttribute::getState(...).
+ AbstractState &getState() override { return *this; }
+
+ /// See AbstractAttribute::getState(...).
+ const AbstractState &getState() const override { return *this; }
+
+ /// See AbstractAttribute::getManifestPosition().
+ ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; }
+
+ /// See AbstractAttribute::updateImpl(Attributor &A).
+ ChangeStatus updateImpl(Attributor &A) override;
+
+ /// Return the number of potential return values, -1 if unknown.
+ size_t getNumReturnValues() const {
+ return isValidState() ? ReturnedValues.size() : -1;
+ }
+
+ /// Return an assumed unique return value if a single candidate is found. If
+ /// there cannot be one, return a nullptr. If it is not clear yet, return the
+ /// Optional::NoneType.
+ Optional<Value *> getAssumedUniqueReturnValue() const;
+
+ /// See AbstractState::checkForallReturnedValues(...).
+ bool
+ checkForallReturnedValues(std::function<bool(Value &)> &Pred) const override;
+
+ /// Pretty print the attribute similar to the IR representation.
+ const std::string getAsStr() const override;
+
+ /// See AbstractState::isAtFixpoint().
+ bool isAtFixpoint() const override { return IsFixed; }
+
+ /// See AbstractState::isValidState().
+ bool isValidState() const override { return IsValidState; }
+
+ /// See AbstractState::indicateOptimisticFixpoint(...).
+ void indicateOptimisticFixpoint() override {
+ IsFixed = true;
+ IsValidState &= true;
+ }
+ void indicatePessimisticFixpoint() override {
+ IsFixed = true;
+ IsValidState = false;
+ }
+};
+
+ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ // Bookkeeping.
+ assert(isValidState());
+ NumFnKnownReturns++;
+
+ // Check if we have an assumed unique return value that we could manifest.
+ Optional<Value *> UniqueRV = getAssumedUniqueReturnValue();
+
+ if (!UniqueRV.hasValue() || !UniqueRV.getValue())
+ return Changed;
+
+ // Bookkeeping.
+ NumFnUniqueReturned++;
+
+ // If the assumed unique return value is an argument, annotate it.
+ if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.getValue())) {
+ AssociatedVal = UniqueRVArg;
+ Changed = AbstractAttribute::manifest(A) | Changed;
+ }
+
+ return Changed;
+}
+
+const std::string AAReturnedValuesImpl::getAsStr() const {
+ return (isAtFixpoint() ? "returns(#" : "may-return(#") +
+ (isValidState() ? std::to_string(getNumReturnValues()) : "?") + ")";
+}
+
+Optional<Value *> AAReturnedValuesImpl::getAssumedUniqueReturnValue() const {
+ // If checkForallReturnedValues provides a unique value, ignoring potential
+ // undef values that can also be present, it is assumed to be the actual
+ // return value and forwarded to the caller of this method. If there are
+ // multiple, a nullptr is returned indicating there cannot be a unique
+ // returned value.
+ Optional<Value *> UniqueRV;
+
+ std::function<bool(Value &)> Pred = [&](Value &RV) -> bool {
+ // If we found a second returned value and neither the current nor the saved
+ // one is an undef, there is no unique returned value. Undefs are special
+ // since we can pretend they have any value.
+ if (UniqueRV.hasValue() && UniqueRV != &RV &&
+ !(isa<UndefValue>(RV) || isa<UndefValue>(UniqueRV.getValue()))) {
+ UniqueRV = nullptr;
+ return false;
+ }
+
+ // Do not overwrite a value with an undef.
+ if (!UniqueRV.hasValue() || !isa<UndefValue>(RV))
+ UniqueRV = &RV;
+
+ return true;
+ };
+
+ if (!checkForallReturnedValues(Pred))
+ UniqueRV = nullptr;
+
+ return UniqueRV;
+}
+
+bool AAReturnedValuesImpl::checkForallReturnedValues(
+ std::function<bool(Value &)> &Pred) const {
+ if (!isValidState())
+ return false;
+
+ // Check all returned values but ignore call sites as long as we have not
+ // encountered an overdefined one during an update.
+ for (auto &It : ReturnedValues) {
+ Value *RV = It.first;
+
+ ImmutableCallSite ICS(RV);
+ if (ICS && !HasOverdefinedReturnedCalls)
+ continue;
+
+ if (!Pred(*RV))
+ return false;
+ }
+
+ return true;
+}
+
+ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
+
+ // Check if we know of any values returned by the associated function,
+ // if not, we are done.
+ if (getNumReturnValues() == 0) {
+ indicateOptimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
+ }
+
+ // Check if any of the returned values is a call site we can refine.
+ decltype(ReturnedValues) AddRVs;
+ bool HasCallSite = false;
+
+ // Look at all returned call sites.
+ for (auto &It : ReturnedValues) {
+ SmallPtrSet<ReturnInst *, 2> &ReturnInsts = It.second;
+ Value *RV = It.first;
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Potentially returned value " << *RV
+ << "\n");
+
+ // Only call sites can change during an update, ignore the rest.
+ CallSite RetCS(RV);
+ if (!RetCS)
+ continue;
+
+ // For now, any call site we see will prevent us from directly fixing the
+ // state. However, if the information on the callees is fixed, the call
+ // sites will be removed and we will fix the information for this state.
+ HasCallSite = true;
+
+ // Try to find a assumed unique return value for the called function.
+ auto *RetCSAA = A.getAAFor<AAReturnedValuesImpl>(*this, *RV);
+ if (!RetCSAA) {
+ HasOverdefinedReturnedCalls = true;
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned call site (" << *RV
+ << ") with " << (RetCSAA ? "invalid" : "no")
+ << " associated state\n");
+ continue;
+ }
+
+ // Try to find a assumed unique return value for the called function.
+ Optional<Value *> AssumedUniqueRV = RetCSAA->getAssumedUniqueReturnValue();
+
+ // If no assumed unique return value was found due to the lack of
+ // candidates, we may need to resolve more calls (through more update
+ // iterations) or the called function will not return. Either way, we simply
+ // stick with the call sites as return values. Because there were not
+ // multiple possibilities, we do not treat it as overdefined.
+ if (!AssumedUniqueRV.hasValue())
+ continue;
+
+ // If multiple, non-refinable values were found, there cannot be a unique
+ // return value for the called function. The returned call is overdefined!
+ if (!AssumedUniqueRV.getValue()) {
+ HasOverdefinedReturnedCalls = true;
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned call site has multiple "
+ "potentially returned values\n");
+ continue;
+ }
+
+ LLVM_DEBUG({
+ bool UniqueRVIsKnown = RetCSAA->isAtFixpoint();
+ dbgs() << "[AAReturnedValues] Returned call site "
+ << (UniqueRVIsKnown ? "known" : "assumed")
+ << " unique return value: " << *AssumedUniqueRV << "\n";
+ });
+
+ // The assumed unique return value.
+ Value *AssumedRetVal = AssumedUniqueRV.getValue();
+
+ // If the assumed unique return value is an argument, lookup the matching
+ // call site operand and recursively collect new returned values.
+ // If it is not an argument, it is just put into the set of returned values
+ // as we would have already looked through casts, phis, and similar values.
+ if (Argument *AssumedRetArg = dyn_cast<Argument>(AssumedRetVal))
+ collectValuesRecursively(A,
+ RetCS.getArgOperand(AssumedRetArg->getArgNo()),
+ ReturnInsts, AddRVs);
+ else
+ AddRVs[AssumedRetVal].insert(ReturnInsts.begin(), ReturnInsts.end());
+ }
+
+ // Keep track of any change to trigger updates on dependent attributes.
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ for (auto &It : AddRVs) {
+ assert(!It.second.empty() && "Entry does not add anything.");
+ auto &ReturnInsts = ReturnedValues[It.first];
+ for (ReturnInst *RI : It.second)
+ if (ReturnInsts.insert(RI).second) {
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Add new returned value "
+ << *It.first << " => " << *RI << "\n");
+ Changed = ChangeStatus::CHANGED;
+ }
+ }
+
+ // If there is no call site in the returned values we are done.
+ if (!HasCallSite) {
+ indicateOptimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+
+ return Changed;
+}
+
+/// ------------------------ NoSync Function Attribute -------------------------
+
+struct AANoSyncFunction : AANoSync, BooleanState {
+
+ AANoSyncFunction(Function &F, InformationCache &InfoCache)
+ : AANoSync(F, InfoCache) {}
+
+ /// See AbstractAttribute::getState()
+ /// {
+ AbstractState &getState() override { return *this; }
+ const AbstractState &getState() const override { return *this; }
+ /// }
+
+ /// See AbstractAttribute::getManifestPosition().
+ ManifestPosition getManifestPosition() const override { return MP_FUNCTION; }
+
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nosync" : "may-sync";
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+
+ /// See AANoSync::isAssumedNoSync()
+ bool isAssumedNoSync() const override { return getAssumed(); }
+
+ /// See AANoSync::isKnownNoSync()
+ bool isKnownNoSync() const override { return getKnown(); }
+
+ /// Helper function used to determine whether an instruction is non-relaxed
+ /// atomic. In other words, if an atomic instruction does not have unordered
+ /// or monotonic ordering
+ static bool isNonRelaxedAtomic(Instruction *I);
+
+ /// Helper function used to determine whether an instruction is volatile.
+ static bool isVolatile(Instruction *I);
+
+ /// Helper function uset to check if intrinsic is volatile (memcpy, memmove,
+ /// memset).
+ static bool isNoSyncIntrinsic(Instruction *I);
+};
+
+bool AANoSyncFunction::isNonRelaxedAtomic(Instruction *I) {
+ if (!I->isAtomic())
+ return false;
+
+ AtomicOrdering Ordering;
+ switch (I->getOpcode()) {
+ case Instruction::AtomicRMW:
+ Ordering = cast<AtomicRMWInst>(I)->getOrdering();
+ break;
+ case Instruction::Store:
+ Ordering = cast<StoreInst>(I)->getOrdering();
+ break;
+ case Instruction::Load:
+ Ordering = cast<LoadInst>(I)->getOrdering();
+ break;
+ case Instruction::Fence: {
+ auto *FI = cast<FenceInst>(I);
+ if (FI->getSyncScopeID() == SyncScope::SingleThread)
+ return false;
+ Ordering = FI->getOrdering();
+ break;
+ }
+ case Instruction::AtomicCmpXchg: {
+ AtomicOrdering Success = cast<AtomicCmpXchgInst>(I)->getSuccessOrdering();
+ AtomicOrdering Failure = cast<AtomicCmpXchgInst>(I)->getFailureOrdering();
+ // Only if both are relaxed, than it can be treated as relaxed.
+ // Otherwise it is non-relaxed.
+ if (Success != AtomicOrdering::Unordered &&
+ Success != AtomicOrdering::Monotonic)
+ return true;
+ if (Failure != AtomicOrdering::Unordered &&
+ Failure != AtomicOrdering::Monotonic)
+ return true;
+ return false;
+ }
+ default:
+ llvm_unreachable(
+ "New atomic operations need to be known in the attributor.");
+ }
+
+ // Relaxed.
+ if (Ordering == AtomicOrdering::Unordered ||
+ Ordering == AtomicOrdering::Monotonic)
+ return false;
+ return true;
+}
+
+/// Checks if an intrinsic is nosync. Currently only checks mem* intrinsics.
+/// FIXME: We should ipmrove the handling of intrinsics.
+bool AANoSyncFunction::isNoSyncIntrinsic(Instruction *I) {
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ /// Element wise atomic memory intrinsics are can only be unordered,
+ /// therefore nosync.
+ case Intrinsic::memset_element_unordered_atomic:
+ case Intrinsic::memmove_element_unordered_atomic:
+ case Intrinsic::memcpy_element_unordered_atomic:
+ return true;
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ if (!cast<MemIntrinsic>(II)->isVolatile())
+ return true;
+ return false;
+ default:
+ return false;
+ }
+ }
+ return false;
+}
+
+bool AANoSyncFunction::isVolatile(Instruction *I) {
+ assert(!ImmutableCallSite(I) && !isa<CallBase>(I) &&
+ "Calls should not be checked here");
+
+ switch (I->getOpcode()) {
+ case Instruction::AtomicRMW:
+ return cast<AtomicRMWInst>(I)->isVolatile();
+ case Instruction::Store:
+ return cast<StoreInst>(I)->isVolatile();
+ case Instruction::Load:
+ return cast<LoadInst>(I)->isVolatile();
+ case Instruction::AtomicCmpXchg:
+ return cast<AtomicCmpXchgInst>(I)->isVolatile();
+ default:
+ return false;
+ }
+}
+
+ChangeStatus AANoSyncFunction::updateImpl(Attributor &A) {
+ Function &F = getAnchorScope();
+
+ /// We are looking for volatile instructions or Non-Relaxed atomics.
+ /// FIXME: We should ipmrove the handling of intrinsics.
+ for (Instruction *I : InfoCache.getReadOrWriteInstsForFunction(F)) {
+ ImmutableCallSite ICS(I);
+ auto *NoSyncAA = A.getAAFor<AANoSyncFunction>(*this, *I);
+
+ if (isa<IntrinsicInst>(I) && isNoSyncIntrinsic(I))
+ continue;
+
+ if (ICS && (!NoSyncAA || !NoSyncAA->isAssumedNoSync()) &&
+ !ICS.hasFnAttr(Attribute::NoSync)) {
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+
+ if (ICS)
+ continue;
+
+ if (!isVolatile(I) && !isNonRelaxedAtomic(I))
+ continue;
+
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+
+ auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+ auto Opcodes = {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
+ (unsigned)Instruction::Call};
+
+ for (unsigned Opcode : Opcodes) {
+ for (Instruction *I : OpcodeInstMap[Opcode]) {
+ // At this point we handled all read/write effects and they are all
+ // nosync, so they can be skipped.
+ if (I->mayReadOrWriteMemory())
+ continue;
+
+ ImmutableCallSite ICS(I);
+
+ // non-convergent and readnone imply nosync.
+ if (!ICS.isConvergent())
+ continue;
+
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+ }
+
+ return ChangeStatus::UNCHANGED;
+}
+
+/// ------------------------ No-Free Attributes ----------------------------
+
+struct AANoFreeFunction : AbstractAttribute, BooleanState {
+
+ /// See AbstractAttribute::AbstractAttribute(...).
+ AANoFreeFunction(Function &F, InformationCache &InfoCache)
+ : AbstractAttribute(F, InfoCache) {}
+
+ /// See AbstractAttribute::getState()
+ ///{
+ AbstractState &getState() override { return *this; }
+ const AbstractState &getState() const override { return *this; }
+ ///}
+
+ /// See AbstractAttribute::getManifestPosition().
+ ManifestPosition getManifestPosition() const override { return MP_FUNCTION; }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nofree" : "may-free";
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+
+ /// See AbstractAttribute::getAttrKind().
+ Attribute::AttrKind getAttrKind() const override { return ID; }
+
+ /// Return true if "nofree" is assumed.
+ bool isAssumedNoFree() const { return getAssumed(); }
+
+ /// Return true if "nofree" is known.
+ bool isKnownNoFree() const { return getKnown(); }
+
+ /// The identifier used by the Attributor for this class of attributes.
+ static constexpr Attribute::AttrKind ID = Attribute::NoFree;
+};
+
+ChangeStatus AANoFreeFunction::updateImpl(Attributor &A) {
+ Function &F = getAnchorScope();
+
+ // The map from instruction opcodes to those instructions in the function.
+ auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+
+ for (unsigned Opcode :
+ {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
+ (unsigned)Instruction::Call}) {
+ for (Instruction *I : OpcodeInstMap[Opcode]) {
+
+ auto ICS = ImmutableCallSite(I);
+ auto *NoFreeAA = A.getAAFor<AANoFreeFunction>(*this, *I);
+
+ if ((!NoFreeAA || !NoFreeAA->isAssumedNoFree()) &&
+ !ICS.hasFnAttr(Attribute::NoFree)) {
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+ }
+ }
+ return ChangeStatus::UNCHANGED;
+}
+
+/// ------------------------ NonNull Argument Attribute ------------------------
+struct AANonNullImpl : AANonNull, BooleanState {
+
+ AANonNullImpl(Value &V, InformationCache &InfoCache)
+ : AANonNull(V, InfoCache) {}
+
+ AANonNullImpl(Value *AssociatedVal, Value &AnchoredValue,
+ InformationCache &InfoCache)
+ : AANonNull(AssociatedVal, AnchoredValue, InfoCache) {}
+
+ /// See AbstractAttribute::getState()
+ /// {
+ AbstractState &getState() override { return *this; }
+ const AbstractState &getState() const override { return *this; }
+ /// }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nonnull" : "may-null";
+ }
+
+ /// See AANonNull::isAssumedNonNull().
+ bool isAssumedNonNull() const override { return getAssumed(); }
+
+ /// See AANonNull::isKnownNonNull().
+ bool isKnownNonNull() const override { return getKnown(); }
+
+ /// Generate a predicate that checks if a given value is assumed nonnull.
+ /// The generated function returns true if a value satisfies any of
+ /// following conditions.
+ /// (i) A value is known nonZero(=nonnull).
+ /// (ii) A value is associated with AANonNull and its isAssumedNonNull() is
+ /// true.
+ std::function<bool(Value &)> generatePredicate(Attributor &);
+};
+
+std::function<bool(Value &)> AANonNullImpl::generatePredicate(Attributor &A) {
+ // FIXME: The `AAReturnedValues` should provide the predicate with the
+ // `ReturnInst` vector as well such that we can use the control flow sensitive
+ // version of `isKnownNonZero`. This should fix `test11` in
+ // `test/Transforms/FunctionAttrs/nonnull.ll`
+
+ std::function<bool(Value &)> Pred = [&](Value &RV) -> bool {
+ if (isKnownNonZero(&RV, getAnchorScope().getParent()->getDataLayout()))
+ return true;
+
+ auto *NonNullAA = A.getAAFor<AANonNull>(*this, RV);
+
+ ImmutableCallSite ICS(&RV);
+
+ if ((!NonNullAA || !NonNullAA->isAssumedNonNull()) &&
+ (!ICS || !ICS.hasRetAttr(Attribute::NonNull)))
+ return false;
+
+ return true;
+ };
+
+ return Pred;
+}
+
+/// NonNull attribute for function return value.
+struct AANonNullReturned : AANonNullImpl {
+
+ AANonNullReturned(Function &F, InformationCache &InfoCache)
+ : AANonNullImpl(F, InfoCache) {}
+
+ /// See AbstractAttribute::getManifestPosition().
+ ManifestPosition getManifestPosition() const override { return MP_RETURNED; }
+
+ /// See AbstractAttriubute::initialize(...).
+ void initialize(Attributor &A) override {
+ Function &F = getAnchorScope();
+
+ // Already nonnull.
+ if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
+ Attribute::NonNull))
+ indicateOptimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+};
+
+ChangeStatus AANonNullReturned::updateImpl(Attributor &A) {
+ Function &F = getAnchorScope();
+
+ auto *AARetVal = A.getAAFor<AAReturnedValues>(*this, F);
+ if (!AARetVal) {
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+
+ std::function<bool(Value &)> Pred = this->generatePredicate(A);
+ if (!AARetVal->checkForallReturnedValues(Pred)) {
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+ return ChangeStatus::UNCHANGED;
+}
+
+/// NonNull attribute for function argument.
+struct AANonNullArgument : AANonNullImpl {
+
+ AANonNullArgument(Argument &A, InformationCache &InfoCache)
+ : AANonNullImpl(A, InfoCache) {}
+
+ /// See AbstractAttribute::getManifestPosition().
+ ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; }
+
+ /// See AbstractAttriubute::initialize(...).
+ void initialize(Attributor &A) override {
+ Argument *Arg = cast<Argument>(getAssociatedValue());
+ if (Arg->hasNonNullAttr())
+ indicateOptimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+};
+
+/// NonNull attribute for a call site argument.
+struct AANonNullCallSiteArgument : AANonNullImpl {
+
+ /// See AANonNullImpl::AANonNullImpl(...).
+ AANonNullCallSiteArgument(CallSite CS, unsigned ArgNo,
+ InformationCache &InfoCache)
+ : AANonNullImpl(CS.getArgOperand(ArgNo), *CS.getInstruction(), InfoCache),
+ ArgNo(ArgNo) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ CallSite CS(&getAnchoredValue());
+ if (isKnownNonZero(getAssociatedValue(),
+ getAnchorScope().getParent()->getDataLayout()) ||
+ CS.paramHasAttr(ArgNo, getAttrKind()))
+ indicateOptimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(Attributor &A).
+ ChangeStatus updateImpl(Attributor &A) override;
+
+ /// See AbstractAttribute::getManifestPosition().
+ ManifestPosition getManifestPosition() const override {
+ return MP_CALL_SITE_ARGUMENT;
+ };
+
+ // Return argument index of associated value.
+ int getArgNo() const { return ArgNo; }
+
+private:
+ unsigned ArgNo;
+};
+ChangeStatus AANonNullArgument::updateImpl(Attributor &A) {
+ Function &F = getAnchorScope();
+ Argument &Arg = cast<Argument>(getAnchoredValue());
+
+ unsigned ArgNo = Arg.getArgNo();
+
+ // Callback function
+ std::function<bool(CallSite)> CallSiteCheck = [&](CallSite CS) {
+ assert(CS && "Sanity check: Call site was not initialized properly!");
+
+ auto *NonNullAA = A.getAAFor<AANonNull>(*this, *CS.getInstruction(), ArgNo);
+
+ // Check that NonNullAA is AANonNullCallSiteArgument.
+ if (NonNullAA) {
+ ImmutableCallSite ICS(&NonNullAA->getAnchoredValue());
+ if (ICS && CS.getInstruction() == ICS.getInstruction())
+ return NonNullAA->isAssumedNonNull();
+ return false;
+ }
+
+ if (CS.paramHasAttr(ArgNo, Attribute::NonNull))
+ return true;
+
+ Value *V = CS.getArgOperand(ArgNo);
+ if (isKnownNonZero(V, getAnchorScope().getParent()->getDataLayout()))
+ return true;
+
+ return false;
+ };
+ if (!A.checkForAllCallSites(F, CallSiteCheck, true)) {
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+ return ChangeStatus::UNCHANGED;
+}
+
+ChangeStatus AANonNullCallSiteArgument::updateImpl(Attributor &A) {
+ // NOTE: Never look at the argument of the callee in this method.
+ // If we do this, "nonnull" is always deduced because of the assumption.
+
+ Value &V = *getAssociatedValue();
+
+ auto *NonNullAA = A.getAAFor<AANonNull>(*this, V);
+
+ if (!NonNullAA || !NonNullAA->isAssumedNonNull()) {
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+
+ return ChangeStatus::UNCHANGED;
+}
+
+/// ------------------------ Will-Return Attributes ----------------------------
+
+struct AAWillReturnImpl : public AAWillReturn, BooleanState {
+
+ /// See AbstractAttribute::AbstractAttribute(...).
+ AAWillReturnImpl(Function &F, InformationCache &InfoCache)
+ : AAWillReturn(F, InfoCache) {}
+
+ /// See AAWillReturn::isKnownWillReturn().
+ bool isKnownWillReturn() const override { return getKnown(); }
+
+ /// See AAWillReturn::isAssumedWillReturn().
+ bool isAssumedWillReturn() const override { return getAssumed(); }
+
+ /// See AbstractAttribute::getState(...).
+ AbstractState &getState() override { return *this; }
+
+ /// See AbstractAttribute::getState(...).
+ const AbstractState &getState() const override { return *this; }
+
+ /// See AbstractAttribute::getAsStr()
+ const std::string getAsStr() const override {
+ return getAssumed() ? "willreturn" : "may-noreturn";
+ }
+};
+
+struct AAWillReturnFunction final : AAWillReturnImpl {
+
+ /// See AbstractAttribute::AbstractAttribute(...).
+ AAWillReturnFunction(Function &F, InformationCache &InfoCache)
+ : AAWillReturnImpl(F, InfoCache) {}
+
+ /// See AbstractAttribute::getManifestPosition().
+ ManifestPosition getManifestPosition() const override {
+ return MP_FUNCTION;
+ }
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override;
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+};
+
+// Helper function that checks whether a function has any cycle.
+// TODO: Replace with more efficent code
+bool containsCycle(Function &F) {
+ SmallPtrSet<BasicBlock *, 32> Visited;
+
+ // Traverse BB by dfs and check whether successor is already visited.
+ for (BasicBlock *BB : depth_first(&F)) {
+ Visited.insert(BB);
+ for (auto *SuccBB : successors(BB)) {
+ if (Visited.count(SuccBB))
+ return true;
+ }
+ }
+ return false;
+}
+
+// Helper function that checks the function have a loop which might become an
+// endless loop
+// FIXME: Any cycle is regarded as endless loop for now.
+// We have to allow some patterns.
+bool containsPossiblyEndlessLoop(Function &F) { return containsCycle(F); }
+
+void AAWillReturnFunction::initialize(Attributor &A) {
+ Function &F = getAnchorScope();
+
+ if (containsPossiblyEndlessLoop(F))
+ indicatePessimisticFixpoint();
+}
+
+ChangeStatus AAWillReturnFunction::updateImpl(Attributor &A) {
+ Function &F = getAnchorScope();
+
+ // The map from instruction opcodes to those instructions in the function.
+ auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+
+ for (unsigned Opcode :
+ {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
+ (unsigned)Instruction::Call}) {
+ for (Instruction *I : OpcodeInstMap[Opcode]) {
+ auto ICS = ImmutableCallSite(I);
+
+ if (ICS.hasFnAttr(Attribute::WillReturn))
+ continue;
+
+ auto *WillReturnAA = A.getAAFor<AAWillReturn>(*this, *I);
+ if (!WillReturnAA || !WillReturnAA->isAssumedWillReturn()) {
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+
+ auto *NoRecurseAA = A.getAAFor<AANoRecurse>(*this, *I);
+
+ // FIXME: (i) Prohibit any recursion for now.
+ // (ii) AANoRecurse isn't implemented yet so currently any call is
+ // regarded as having recursion.
+ // Code below should be
+ // if ((!NoRecurseAA || !NoRecurseAA->isAssumedNoRecurse()) &&
+ if (!NoRecurseAA && !ICS.hasFnAttr(Attribute::NoRecurse)) {
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+ }
+ }
+
+ return ChangeStatus::UNCHANGED;
+}
+
+/// ----------------------------------------------------------------------------
+/// Attributor
+/// ----------------------------------------------------------------------------
+
+bool Attributor::checkForAllCallSites(Function &F,
+ std::function<bool(CallSite)> &Pred,
+ bool RequireAllCallSites) {
+ // We can try to determine information from
+ // the call sites. However, this is only possible all call sites are known,
+ // hence the function has internal linkage.
+ if (RequireAllCallSites && !F.hasInternalLinkage()) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Attributor: Function " << F.getName()
+ << " has no internal linkage, hence not all call sites are known\n");
+ return false;
+ }
+
+ for (const Use &U : F.uses()) {
+
+ CallSite CS(U.getUser());
+ if (!CS || !CS.isCallee(&U) || !CS.getCaller()->hasExactDefinition()) {
+ if (!RequireAllCallSites)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Attributor: User " << *U.getUser()
+ << " is an invalid use of " << F.getName() << "\n");
+ return false;
+ }
+
+ if (Pred(CS))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Attributor: Call site callback failed for "
+ << *CS.getInstruction() << "\n");
+ return false;
+ }
+
+ return true;
+}
+
+ChangeStatus Attributor::run() {
+ // Initialize all abstract attributes.
+ for (AbstractAttribute *AA : AllAbstractAttributes)
+ AA->initialize(*this);
+
+ LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized "
+ << AllAbstractAttributes.size()
+ << " abstract attributes.\n");
+
+ // Now that all abstract attributes are collected and initialized we start
+ // the abstract analysis.
+
+ unsigned IterationCounter = 1;
+
+ SmallVector<AbstractAttribute *, 64> ChangedAAs;
+ SetVector<AbstractAttribute *> Worklist;
+ Worklist.insert(AllAbstractAttributes.begin(), AllAbstractAttributes.end());
+
+ do {
+ LLVM_DEBUG(dbgs() << "\n\n[Attributor] #Iteration: " << IterationCounter
+ << ", Worklist size: " << Worklist.size() << "\n");
+
+ // Add all abstract attributes that are potentially dependent on one that
+ // changed to the work list.
+ for (AbstractAttribute *ChangedAA : ChangedAAs) {
+ auto &QuerriedAAs = QueryMap[ChangedAA];
+ Worklist.insert(QuerriedAAs.begin(), QuerriedAAs.end());
+ }
+
+ // Reset the changed set.
+ ChangedAAs.clear();
+
+ // Update all abstract attribute in the work list and record the ones that
+ // changed.
+ for (AbstractAttribute *AA : Worklist)
+ if (AA->update(*this) == ChangeStatus::CHANGED)
+ ChangedAAs.push_back(AA);
+
+ // Reset the work list and repopulate with the changed abstract attributes.
+ // Note that dependent ones are added above.
+ Worklist.clear();
+ Worklist.insert(ChangedAAs.begin(), ChangedAAs.end());
+
+ } while (!Worklist.empty() && ++IterationCounter < MaxFixpointIterations);
+
+ LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: "
+ << IterationCounter << "/" << MaxFixpointIterations
+ << " iterations\n");
+
+ bool FinishedAtFixpoint = Worklist.empty();
+
+ // Reset abstract arguments not settled in a sound fixpoint by now. This
+ // happens when we stopped the fixpoint iteration early. Note that only the
+ // ones marked as "changed" *and* the ones transitively depending on them
+ // need to be reverted to a pessimistic state. Others might not be in a
+ // fixpoint state but we can use the optimistic results for them anyway.
+ SmallPtrSet<AbstractAttribute *, 32> Visited;
+ for (unsigned u = 0; u < ChangedAAs.size(); u++) {
+ AbstractAttribute *ChangedAA = ChangedAAs[u];
+ if (!Visited.insert(ChangedAA).second)
+ continue;
+
+ AbstractState &State = ChangedAA->getState();
+ if (!State.isAtFixpoint()) {
+ State.indicatePessimisticFixpoint();
+
+ NumAttributesTimedOut++;
+ }
+
+ auto &QuerriedAAs = QueryMap[ChangedAA];
+ ChangedAAs.append(QuerriedAAs.begin(), QuerriedAAs.end());
+ }
+
+ LLVM_DEBUG({
+ if (!Visited.empty())
+ dbgs() << "\n[Attributor] Finalized " << Visited.size()
+ << " abstract attributes.\n";
+ });
+
+ unsigned NumManifested = 0;
+ unsigned NumAtFixpoint = 0;
+ ChangeStatus ManifestChange = ChangeStatus::UNCHANGED;
+ for (AbstractAttribute *AA : AllAbstractAttributes) {
+ AbstractState &State = AA->getState();
+
+ // If there is not already a fixpoint reached, we can now take the
+ // optimistic state. This is correct because we enforced a pessimistic one
+ // on abstract attributes that were transitively dependent on a changed one
+ // already above.
+ if (!State.isAtFixpoint())
+ State.indicateOptimisticFixpoint();
+
+ // If the state is invalid, we do not try to manifest it.
+ if (!State.isValidState())
+ continue;
+
+ // Manifest the state and record if we changed the IR.
+ ChangeStatus LocalChange = AA->manifest(*this);
+ ManifestChange = ManifestChange | LocalChange;
+
+ NumAtFixpoint++;
+ NumManifested += (LocalChange == ChangeStatus::CHANGED);
+ }
+
+ (void)NumManifested;
+ (void)NumAtFixpoint;
+ LLVM_DEBUG(dbgs() << "\n[Attributor] Manifested " << NumManifested
+ << " arguments while " << NumAtFixpoint
+ << " were in a valid fixpoint state\n");
+
+ // If verification is requested, we finished this run at a fixpoint, and the
+ // IR was changed, we re-run the whole fixpoint analysis, starting at
+ // re-initialization of the arguments. This re-run should not result in an IR
+ // change. Though, the (virtual) state of attributes at the end of the re-run
+ // might be more optimistic than the known state or the IR state if the better
+ // state cannot be manifested.
+ if (VerifyAttributor && FinishedAtFixpoint &&
+ ManifestChange == ChangeStatus::CHANGED) {
+ VerifyAttributor = false;
+ ChangeStatus VerifyStatus = run();
+ if (VerifyStatus != ChangeStatus::UNCHANGED)
+ llvm_unreachable(
+ "Attributor verification failed, re-run did result in an IR change "
+ "even after a fixpoint was reached in the original run. (False "
+ "positives possible!)");
+ VerifyAttributor = true;
+ }
+
+ NumAttributesManifested += NumManifested;
+ NumAttributesValidFixpoint += NumAtFixpoint;
+
+ return ManifestChange;
+}
+
+void Attributor::identifyDefaultAbstractAttributes(
+ Function &F, InformationCache &InfoCache,
+ DenseSet</* Attribute::AttrKind */ unsigned> *Whitelist) {
+
+ // Every function can be nounwind.
+ registerAA(*new AANoUnwindFunction(F, InfoCache));
+
+ // Every function might be marked "nosync"
+ registerAA(*new AANoSyncFunction(F, InfoCache));
+
+ // Every function might be "no-free".
+ registerAA(*new AANoFreeFunction(F, InfoCache));
+
+ // Return attributes are only appropriate if the return type is non void.
+ Type *ReturnType = F.getReturnType();
+ if (!ReturnType->isVoidTy()) {
+ // Argument attribute "returned" --- Create only one per function even
+ // though it is an argument attribute.
+ if (!Whitelist || Whitelist->count(AAReturnedValues::ID))
+ registerAA(*new AAReturnedValuesImpl(F, InfoCache));
+
+ // Every function with pointer return type might be marked nonnull.
+ if (ReturnType->isPointerTy() &&
+ (!Whitelist || Whitelist->count(AANonNullReturned::ID)))
+ registerAA(*new AANonNullReturned(F, InfoCache));
+ }
+
+ // Every argument with pointer type might be marked nonnull.
+ for (Argument &Arg : F.args()) {
+ if (Arg.getType()->isPointerTy())
+ registerAA(*new AANonNullArgument(Arg, InfoCache));
+ }
+
+ // Every function might be "will-return".
+ registerAA(*new AAWillReturnFunction(F, InfoCache));
+
+ // Walk all instructions to find more attribute opportunities and also
+ // interesting instructions that might be queried by abstract attributes
+ // during their initialization or update.
+ auto &ReadOrWriteInsts = InfoCache.FuncRWInstsMap[&F];
+ auto &InstOpcodeMap = InfoCache.FuncInstOpcodeMap[&F];
+
+ for (Instruction &I : instructions(&F)) {
+ bool IsInterestingOpcode = false;
+
+ // To allow easy access to all instructions in a function with a given
+ // opcode we store them in the InfoCache. As not all opcodes are interesting
+ // to concrete attributes we only cache the ones that are as identified in
+ // the following switch.
+ // Note: There are no concrete attributes now so this is initially empty.
+ switch (I.getOpcode()) {
+ default:
+ assert((!ImmutableCallSite(&I)) && (!isa<CallBase>(&I)) &&
+ "New call site/base instruction type needs to be known int the "
+ "attributor.");
+ break;
+ case Instruction::Call:
+ case Instruction::CallBr:
+ case Instruction::Invoke:
+ case Instruction::CleanupRet:
+ case Instruction::CatchSwitch:
+ case Instruction::Resume:
+ case Instruction::Ret:
+ IsInterestingOpcode = true;
+ }
+ if (IsInterestingOpcode)
+ InstOpcodeMap[I.getOpcode()].push_back(&I);
+ if (I.mayReadOrWriteMemory())
+ ReadOrWriteInsts.push_back(&I);
+
+ CallSite CS(&I);
+ if (CS && CS.getCalledFunction()) {
+ for (int i = 0, e = CS.getCalledFunction()->arg_size(); i < e; i++) {
+ if (!CS.getArgument(i)->getType()->isPointerTy())
+ continue;
+
+ // Call site argument attribute "non-null".
+ registerAA(*new AANonNullCallSiteArgument(CS, i, InfoCache), i);
+ }
+ }
+ }
+}
+
+/// Helpers to ease debugging through output streams and print calls.
+///
+///{
+raw_ostream &llvm::operator<<(raw_ostream &OS, ChangeStatus S) {
+ return OS << (S == ChangeStatus::CHANGED ? "changed" : "unchanged");
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS,
+ AbstractAttribute::ManifestPosition AP) {
+ switch (AP) {
+ case AbstractAttribute::MP_ARGUMENT:
+ return OS << "arg";
+ case AbstractAttribute::MP_CALL_SITE_ARGUMENT:
+ return OS << "cs_arg";
+ case AbstractAttribute::MP_FUNCTION:
+ return OS << "fn";
+ case AbstractAttribute::MP_RETURNED:
+ return OS << "fn_ret";
+ }
+ llvm_unreachable("Unknown attribute position!");
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractState &S) {
+ return OS << (!S.isValidState() ? "top" : (S.isAtFixpoint() ? "fix" : ""));
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractAttribute &AA) {
+ AA.print(OS);
+ return OS;
+}
+
+void AbstractAttribute::print(raw_ostream &OS) const {
+ OS << "[" << getManifestPosition() << "][" << getAsStr() << "]["
+ << AnchoredVal.getName() << "]";
+}
+///}
+
+/// ----------------------------------------------------------------------------
+/// Pass (Manager) Boilerplate
+/// ----------------------------------------------------------------------------
+
+static bool runAttributorOnModule(Module &M) {
+ if (DisableAttributor)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "[Attributor] Run on module with " << M.size()
+ << " functions.\n");
+
+ // Create an Attributor and initially empty information cache that is filled
+ // while we identify default attribute opportunities.
+ Attributor A;
+ InformationCache InfoCache;
+
+ for (Function &F : M) {
+ // TODO: Not all attributes require an exact definition. Find a way to
+ // enable deduction for some but not all attributes in case the
+ // definition might be changed at runtime, see also
+ // http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html.
+ // TODO: We could always determine abstract attributes and if sufficient
+ // information was found we could duplicate the functions that do not
+ // have an exact definition.
+ if (!F.hasExactDefinition()) {
+ NumFnWithoutExactDefinition++;
+ continue;
+ }
+
+ // For now we ignore naked and optnone functions.
+ if (F.hasFnAttribute(Attribute::Naked) ||
+ F.hasFnAttribute(Attribute::OptimizeNone))
+ continue;
+
+ NumFnWithExactDefinition++;
+
+ // Populate the Attributor with abstract attribute opportunities in the
+ // function and the information cache with IR information.
+ A.identifyDefaultAbstractAttributes(F, InfoCache);
+ }
+
+ return A.run() == ChangeStatus::CHANGED;
+}
+
+PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) {
+ if (runAttributorOnModule(M)) {
+ // FIXME: Think about passes we will preserve and add them here.
+ return PreservedAnalyses::none();
+ }
+ return PreservedAnalyses::all();
+}
+
+namespace {
+
+struct AttributorLegacyPass : public ModulePass {
+ static char ID;
+
+ AttributorLegacyPass() : ModulePass(ID) {
+ initializeAttributorLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override {
+ if (skipModule(M))
+ return false;
+ return runAttributorOnModule(M);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ // FIXME: Think about passes we will preserve and add them here.
+ AU.setPreservesCFG();
+ }
+};
+
+} // end anonymous namespace
+
+Pass *llvm::createAttributorLegacyPass() { return new AttributorLegacyPass(); }
+
+char AttributorLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(AttributorLegacyPass, "attributor",
+ "Deduce and propagate attributes", false, false)
+INITIALIZE_PASS_END(AttributorLegacyPass, "attributor",
+ "Deduce and propagate attributes", false, false)
diff --git a/lib/Transforms/IPO/BarrierNoopPass.cpp b/lib/Transforms/IPO/BarrierNoopPass.cpp
index 05fc3dd6950c..6b68aa90c567 100644
--- a/lib/Transforms/IPO/BarrierNoopPass.cpp
+++ b/lib/Transforms/IPO/BarrierNoopPass.cpp
@@ -1,9 +1,8 @@
//===- BarrierNoopPass.cpp - A barrier pass for the pass manager ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/BlockExtractor.cpp b/lib/Transforms/IPO/BlockExtractor.cpp
index ff5ee817da49..6c365f3f3cbe 100644
--- a/lib/Transforms/IPO/BlockExtractor.cpp
+++ b/lib/Transforms/IPO/BlockExtractor.cpp
@@ -1,9 +1,8 @@
//===- BlockExtractor.cpp - Extracts blocks into their own functions ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,6 +22,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
+
using namespace llvm;
#define DEBUG_TYPE "block-extractor"
@@ -36,22 +36,48 @@ static cl::opt<std::string> BlockExtractorFile(
cl::opt<bool> BlockExtractorEraseFuncs("extract-blocks-erase-funcs",
cl::desc("Erase the existing functions"),
cl::Hidden);
-
namespace {
class BlockExtractor : public ModulePass {
- SmallVector<BasicBlock *, 16> Blocks;
+ SmallVector<SmallVector<BasicBlock *, 16>, 4> GroupsOfBlocks;
bool EraseFunctions;
- SmallVector<std::pair<std::string, std::string>, 32> BlocksByName;
+ /// Map a function name to groups of blocks.
+ SmallVector<std::pair<std::string, SmallVector<std::string, 4>>, 4>
+ BlocksByName;
+
+ void init(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
+ &GroupsOfBlocksToExtract) {
+ for (const SmallVectorImpl<BasicBlock *> &GroupOfBlocks :
+ GroupsOfBlocksToExtract) {
+ SmallVector<BasicBlock *, 16> NewGroup;
+ NewGroup.append(GroupOfBlocks.begin(), GroupOfBlocks.end());
+ GroupsOfBlocks.emplace_back(NewGroup);
+ }
+ if (!BlockExtractorFile.empty())
+ loadFile();
+ }
public:
static char ID;
BlockExtractor(const SmallVectorImpl<BasicBlock *> &BlocksToExtract,
bool EraseFunctions)
- : ModulePass(ID), Blocks(BlocksToExtract.begin(), BlocksToExtract.end()),
- EraseFunctions(EraseFunctions) {
- if (!BlockExtractorFile.empty())
- loadFile();
+ : ModulePass(ID), EraseFunctions(EraseFunctions) {
+ // We want one group per element of the input list.
+ SmallVector<SmallVector<BasicBlock *, 16>, 4> MassagedGroupsOfBlocks;
+ for (BasicBlock *BB : BlocksToExtract) {
+ SmallVector<BasicBlock *, 16> NewGroup;
+ NewGroup.push_back(BB);
+ MassagedGroupsOfBlocks.push_back(NewGroup);
+ }
+ init(MassagedGroupsOfBlocks);
}
+
+ BlockExtractor(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
+ &GroupsOfBlocksToExtract,
+ bool EraseFunctions)
+ : ModulePass(ID), EraseFunctions(EraseFunctions) {
+ init(GroupsOfBlocksToExtract);
+ }
+
BlockExtractor() : BlockExtractor(SmallVector<BasicBlock *, 0>(), false) {}
bool runOnModule(Module &M) override;
@@ -70,6 +96,12 @@ ModulePass *llvm::createBlockExtractorPass(
const SmallVectorImpl<BasicBlock *> &BlocksToExtract, bool EraseFunctions) {
return new BlockExtractor(BlocksToExtract, EraseFunctions);
}
+ModulePass *llvm::createBlockExtractorPass(
+ const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
+ &GroupsOfBlocksToExtract,
+ bool EraseFunctions) {
+ return new BlockExtractor(GroupsOfBlocksToExtract, EraseFunctions);
+}
/// Gets all of the blocks specified in the input file.
void BlockExtractor::loadFile() {
@@ -82,8 +114,17 @@ void BlockExtractor::loadFile() {
Buf->getBuffer().split(Lines, '\n', /*MaxSplit=*/-1,
/*KeepEmpty=*/false);
for (const auto &Line : Lines) {
- auto FBPair = Line.split(' ');
- BlocksByName.push_back({FBPair.first, FBPair.second});
+ SmallVector<StringRef, 4> LineSplit;
+ Line.split(LineSplit, ' ', /*MaxSplit=*/-1,
+ /*KeepEmpty=*/false);
+ if (LineSplit.empty())
+ continue;
+ SmallVector<StringRef, 4> BBNames;
+ LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1,
+ /*KeepEmpty=*/false);
+ if (BBNames.empty())
+ report_fatal_error("Missing bbs name");
+ BlocksByName.push_back({LineSplit[0], {BBNames.begin(), BBNames.end()}});
}
}
@@ -130,33 +171,46 @@ bool BlockExtractor::runOnModule(Module &M) {
}
// Get all the blocks specified in the input file.
+ unsigned NextGroupIdx = GroupsOfBlocks.size();
+ GroupsOfBlocks.resize(NextGroupIdx + BlocksByName.size());
for (const auto &BInfo : BlocksByName) {
Function *F = M.getFunction(BInfo.first);
if (!F)
report_fatal_error("Invalid function name specified in the input file");
- auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) {
- return BB.getName().equals(BInfo.second);
- });
- if (Res == F->end())
- report_fatal_error("Invalid block name specified in the input file");
- Blocks.push_back(&*Res);
+ for (const auto &BBInfo : BInfo.second) {
+ auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) {
+ return BB.getName().equals(BBInfo);
+ });
+ if (Res == F->end())
+ report_fatal_error("Invalid block name specified in the input file");
+ GroupsOfBlocks[NextGroupIdx].push_back(&*Res);
+ }
+ ++NextGroupIdx;
}
- // Extract basic blocks.
- for (BasicBlock *BB : Blocks) {
- // Check if the module contains BB.
- if (BB->getParent()->getParent() != &M)
- report_fatal_error("Invalid basic block");
- LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting "
- << BB->getParent()->getName() << ":" << BB->getName()
- << "\n");
- SmallVector<BasicBlock *, 2> BlocksToExtractVec;
- BlocksToExtractVec.push_back(BB);
- if (const InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
- BlocksToExtractVec.push_back(II->getUnwindDest());
- CodeExtractor(BlocksToExtractVec).extractCodeRegion();
- ++NumExtracted;
- Changed = true;
+ // Extract each group of basic blocks.
+ for (auto &BBs : GroupsOfBlocks) {
+ SmallVector<BasicBlock *, 32> BlocksToExtractVec;
+ for (BasicBlock *BB : BBs) {
+ // Check if the module contains BB.
+ if (BB->getParent()->getParent() != &M)
+ report_fatal_error("Invalid basic block");
+ LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting "
+ << BB->getParent()->getName() << ":" << BB->getName()
+ << "\n");
+ BlocksToExtractVec.push_back(BB);
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+ BlocksToExtractVec.push_back(II->getUnwindDest());
+ ++NumExtracted;
+ Changed = true;
+ }
+ Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion();
+ if (F)
+ LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName()
+ << "' in: " << F->getName() << '\n');
+ else
+ LLVM_DEBUG(dbgs() << "Failed to extract for group '"
+ << (*BBs.begin())->getName() << "'\n");
}
// Erase the functions.
diff --git a/lib/Transforms/IPO/CalledValuePropagation.cpp b/lib/Transforms/IPO/CalledValuePropagation.cpp
index de62cfc0c1db..20cb3213628e 100644
--- a/lib/Transforms/IPO/CalledValuePropagation.cpp
+++ b/lib/Transforms/IPO/CalledValuePropagation.cpp
@@ -1,9 +1,8 @@
//===- CalledValuePropagation.cpp - Propagate called values -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 81f3634eaf28..ad877ae1786c 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -1,9 +1,8 @@
//===- ConstantMerge.cpp - Merge duplicate global constants ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -91,6 +90,16 @@ static unsigned getAlignment(GlobalVariable *GV) {
return GV->getParent()->getDataLayout().getPreferredAlignment(GV);
}
+static bool
+isUnmergeableGlobal(GlobalVariable *GV,
+ const SmallPtrSetImpl<const GlobalValue *> &UsedGlobals) {
+ // Only process constants with initializers in the default address space.
+ return !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+ // Don't touch values marked with attribute(used).
+ UsedGlobals.count(GV);
+}
+
enum class CanMerge { No, Yes };
static CanMerge makeMergeable(GlobalVariable *Old, GlobalVariable *New) {
if (!Old->hasGlobalUnnamedAddr() && !New->hasGlobalUnnamedAddr())
@@ -155,11 +164,7 @@ static bool mergeConstants(Module &M) {
continue;
}
- // Only process constants with initializers in the default address space.
- if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
- GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
- // Don't touch values marked with attribute(used).
- UsedGlobals.count(GV))
+ if (isUnmergeableGlobal(GV, UsedGlobals))
continue;
// This transformation is legal for weak ODR globals in the sense it
@@ -197,11 +202,7 @@ static bool mergeConstants(Module &M) {
GVI != E; ) {
GlobalVariable *GV = &*GVI++;
- // Only process constants with initializers in the default address space.
- if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
- GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
- // Don't touch values marked with attribute(used).
- UsedGlobals.count(GV))
+ if (isUnmergeableGlobal(GV, UsedGlobals))
continue;
// We can only replace constant with local linkage.
diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp
index 666f6cc37bfd..e30b33aa4872 100644
--- a/lib/Transforms/IPO/CrossDSOCFI.cpp
+++ b/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -1,9 +1,8 @@
//===-- CrossDSOCFI.cpp - Externalize this module's CFI checks ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -106,10 +105,10 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
}
LLVMContext &Ctx = M.getContext();
- Constant *C = M.getOrInsertFunction(
+ FunctionCallee C = M.getOrInsertFunction(
"__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx),
Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx));
- Function *F = dyn_cast<Function>(C);
+ Function *F = dyn_cast<Function>(C.getCallee());
// Take over the existing function. The frontend emits a weak stub so that the
// linker knows about the symbol; this pass replaces the function body.
F->deleteBody();
@@ -133,9 +132,9 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
BasicBlock *TrapBB = BasicBlock::Create(Ctx, "fail", F);
IRBuilder<> IRBFail(TrapBB);
- Constant *CFICheckFailFn = M.getOrInsertFunction(
- "__cfi_check_fail", Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx),
- Type::getInt8PtrTy(Ctx));
+ FunctionCallee CFICheckFailFn =
+ M.getOrInsertFunction("__cfi_check_fail", Type::getVoidTy(Ctx),
+ Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx));
IRBFail.CreateCall(CFICheckFailFn, {&CFICheckFailData, &Addr});
IRBFail.CreateBr(ExitBB);
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index cb30e8f46a54..968a13110b16 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -1,9 +1,8 @@
//===- DeadArgumentElimination.cpp - Eliminate dead arguments -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -939,7 +938,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
Args, OpBundles, "", Call->getParent());
} else {
- NewCS = CallInst::Create(NF, Args, OpBundles, "", Call);
+ NewCS = CallInst::Create(NFTy, NF, Args, OpBundles, "", Call);
cast<CallInst>(NewCS.getInstruction())
->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
}
diff --git a/lib/Transforms/IPO/ElimAvailExtern.cpp b/lib/Transforms/IPO/ElimAvailExtern.cpp
index d5fef59286dd..fc52db562c62 100644
--- a/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -1,9 +1,8 @@
//===- ElimAvailExtern.cpp - DCE unreachable internal functions -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index a744d7f2d2d9..f77b528fc42d 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -1,9 +1,8 @@
//===-- ExtractGV.cpp - Global Value extraction pass ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 4dc1529ddbf5..b38cb6d0ed3f 100644
--- a/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -1,9 +1,8 @@
//===- ForceFunctionAttrs.cpp - Force function attrs for debugging --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -58,6 +57,7 @@ static Attribute::AttrKind parseAttrKind(StringRef Kind) {
.Case("sanitize_hwaddress", Attribute::SanitizeHWAddress)
.Case("sanitize_memory", Attribute::SanitizeMemory)
.Case("sanitize_thread", Attribute::SanitizeThread)
+ .Case("sanitize_memtag", Attribute::SanitizeMemTag)
.Case("speculative_load_hardening", Attribute::SpeculativeLoadHardening)
.Case("ssp", Attribute::StackProtect)
.Case("sspreq", Attribute::StackProtectReq)
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 4e2a82b56eec..5ccd8bc4b0fb 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1,9 +1,8 @@
//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,6 +27,7 @@
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
@@ -76,6 +76,7 @@ STATISTIC(NumNoAlias, "Number of function returns marked noalias");
STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
STATISTIC(NumNoUnwind, "Number of functions marked as nounwind");
+STATISTIC(NumNoFree, "Number of functions marked as nofree");
// FIXME: This is disabled by default to avoid exposing security vulnerabilities
// in C/C++ code compiled by clang:
@@ -89,6 +90,10 @@ static cl::opt<bool> DisableNoUnwindInference(
"disable-nounwind-inference", cl::Hidden,
cl::desc("Stop inferring nounwind attribute during function-attrs pass"));
+static cl::opt<bool> DisableNoFreeInference(
+ "disable-nofree-inference", cl::Hidden,
+ cl::desc("Stop inferring nofree attribute during function-attrs pass"));
+
namespace {
using SCCNodeSet = SmallSetVector<Function *, 8>;
@@ -256,12 +261,15 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
}
}
+ // If the SCC contains both functions that read and functions that write, then
+ // we cannot add readonly attributes.
+ if (ReadsMemory && WritesMemory)
+ return false;
+
// Success! Functions in this SCC do not access memory, or only read memory.
// Give them the appropriate attribute.
bool MadeChange = false;
- assert(!(ReadsMemory && WritesMemory) &&
- "Function marked read-only and write-only");
for (Function *F : SCCNodes) {
if (F->doesNotAccessMemory())
// Already perfect!
@@ -1228,6 +1236,25 @@ static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) {
return true;
}
+/// Helper for NoFree inference predicate InstrBreaksAttribute.
+static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) {
+ CallSite CS(&I);
+ if (!CS)
+ return false;
+
+ Function *Callee = CS.getCalledFunction();
+ if (!Callee)
+ return true;
+
+ if (Callee->doesNotFreeMemory())
+ return false;
+
+ if (SCCNodes.count(Callee) > 0)
+ return false;
+
+ return true;
+}
+
/// Infer attributes from all functions in the SCC by scanning every
/// instruction for compliance to the attribute assumptions. Currently it
/// does:
@@ -1281,6 +1308,29 @@ static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) {
},
/* RequiresExactDefinition= */ true});
+ if (!DisableNoFreeInference)
+ // Request to infer nofree attribute for all the functions in the SCC if
+ // every callsite within the SCC does not directly or indirectly free
+ // memory (except for calls to functions within the SCC). Note that nofree
+ // attribute suffers from derefinement - results may change depending on
+ // how functions are optimized. Thus it can be inferred only from exact
+ // definitions.
+ AI.registerAttrInference(AttributeInferer::InferenceDescriptor{
+ Attribute::NoFree,
+ // Skip functions known not to free memory.
+ [](const Function &F) { return F.doesNotFreeMemory(); },
+ // Instructions that break non-deallocating assumption.
+ [SCCNodes](Instruction &I) {
+ return InstrBreaksNoFree(I, SCCNodes);
+ },
+ [](Function &F) {
+ LLVM_DEBUG(dbgs()
+ << "Adding nofree attr to fn " << F.getName() << "\n");
+ F.setDoesNotFreeMemory();
+ ++NumNoFree;
+ },
+ /* RequiresExactDefinition= */ true});
+
// Perform all the requested attribute inference actions.
return AI.run(SCCNodes);
}
@@ -1301,7 +1351,7 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
return false;
Function *F = *SCCNodes.begin();
- if (!F || F->isDeclaration() || F->doesNotRecurse())
+ if (!F || !F->hasExactDefinition() || F->doesNotRecurse())
return false;
// If all of the calls in F are identifiable and are to norecurse functions, F
@@ -1323,7 +1373,8 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
}
template <typename AARGetterT>
-static bool deriveAttrsInPostOrder(SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
+static bool deriveAttrsInPostOrder(SCCNodeSet &SCCNodes,
+ AARGetterT &&AARGetter,
bool HasUnknownCall) {
bool Changed = false;
@@ -1367,8 +1418,7 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
bool HasUnknownCall = false;
for (LazyCallGraph::Node &N : C) {
Function &F = N.getFunction();
- if (F.hasFnAttribute(Attribute::OptimizeNone) ||
- F.hasFnAttribute(Attribute::Naked)) {
+ if (F.hasOptNone() || F.hasFnAttribute(Attribute::Naked)) {
// Treat any function we're trying not to optimize as if it were an
// indirect call and omit it from the node set used below.
HasUnknownCall = true;
@@ -1441,8 +1491,7 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
bool ExternalNode = false;
for (CallGraphNode *I : SCC) {
Function *F = I->getFunction();
- if (!F || F->hasFnAttribute(Attribute::OptimizeNone) ||
- F->hasFnAttribute(Attribute::Naked)) {
+ if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked)) {
// External node or function we're trying not to optimize - we both avoid
// transform them and avoid leveraging information they provide.
ExternalNode = true;
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index 1223a23512ed..62c7fbd07223 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -1,9 +1,8 @@
//===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -778,9 +777,7 @@ void llvm::computeDeadSymbols(
if (!VI)
return;
- // We need to make sure all variants of the symbol are scanned, alias can
- // make one (but not all) alive.
- if (llvm::all_of(VI.getSummaryList(),
+ if (llvm::any_of(VI.getSummaryList(),
[](const std::unique_ptr<llvm::GlobalValueSummary> &S) {
return S->isLive();
}))
@@ -820,12 +817,18 @@ void llvm::computeDeadSymbols(
while (!Worklist.empty()) {
auto VI = Worklist.pop_back_val();
for (auto &Summary : VI.getSummaryList()) {
- GlobalValueSummary *Base = Summary->getBaseObject();
- // Set base value live in case it is an alias.
- Base->setLive(true);
- for (auto Ref : Base->refs())
+ if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
+ // If this is an alias, visit the aliasee VI to ensure that all copies
+ // are marked live and it is added to the worklist for further
+ // processing of its references.
+ visit(AS->getAliaseeVI());
+ continue;
+ }
+
+ Summary->setLive(true);
+ for (auto Ref : Summary->refs())
visit(Ref);
- if (auto *FS = dyn_cast<FunctionSummary>(Base))
+ if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
for (auto Call : FS->calls())
visit(Call.first);
}
@@ -847,14 +850,16 @@ void llvm::computeDeadSymbolsWithConstProp(
bool ImportEnabled) {
computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing);
if (ImportEnabled) {
- Index.propagateConstants(GUIDPreservedSymbols);
+ Index.propagateAttributes(GUIDPreservedSymbols);
} else {
- // If import is disabled we should drop read-only attribute
+ // If import is disabled we should drop read/write-only attribute
// from all summaries to prevent internalization.
for (auto &P : Index)
for (auto &S : P.second.SummaryList)
- if (auto *GVS = dyn_cast<GlobalVarSummary>(S.get()))
+ if (auto *GVS = dyn_cast<GlobalVarSummary>(S.get())) {
GVS->setReadOnly(false);
+ GVS->setWriteOnly(false);
+ }
}
}
@@ -973,12 +978,15 @@ void llvm::thinLTOResolvePrevailingInModule(
// changed to enable this for aliases.
llvm_unreachable("Expected GV to be converted");
} else {
- // If the original symbols has global unnamed addr and linkonce_odr linkage,
- // it should be an auto hide symbol. Add hidden visibility to the symbol to
- // preserve the property.
- if (GV.hasLinkOnceODRLinkage() && GV.hasGlobalUnnamedAddr() &&
- NewLinkage == GlobalValue::WeakODRLinkage)
+ // If all copies of the original symbol had global unnamed addr and
+ // linkonce_odr linkage, it should be an auto hide symbol. In that case
+ // the thin link would have marked it as CanAutoHide. Add hidden visibility
+ // to the symbol to preserve the property.
+ if (NewLinkage == GlobalValue::WeakODRLinkage &&
+ GS->second->canAutoHide()) {
+ assert(GV.hasLinkOnceODRLinkage() && GV.hasGlobalUnnamedAddr());
GV.setVisibility(GlobalValue::HiddenVisibility);
+ }
LLVM_DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName()
<< "` from " << GV.getLinkage() << " to " << NewLinkage
@@ -1047,9 +1055,10 @@ static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
ValueToValueMapTy VMap;
Function *NewFn = CloneFunction(Fn, VMap);
- // Clone should use the original alias's linkage and name, and we ensure
- // all uses of alias instead use the new clone (casted if necessary).
+ // Clone should use the original alias's linkage, visibility and name, and we
+ // ensure all uses of alias instead use the new clone (casted if necessary).
NewFn->setLinkage(GA->getLinkage());
+ NewFn->setVisibility(GA->getVisibility());
GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewFn, GA->getType()));
NewFn->takeName(GA);
return NewFn;
@@ -1057,7 +1066,7 @@ static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
// Internalize values that we marked with specific attribute
// in processGlobalForThinLTO.
-static void internalizeImmutableGVs(Module &M) {
+static void internalizeGVsAfterImport(Module &M) {
for (auto &GV : M.globals())
// Skip GVs which have been converted to declarations
// by dropDeadSymbols.
@@ -1190,7 +1199,7 @@ Expected<bool> FunctionImporter::importFunctions(
NumImportedModules++;
}
- internalizeImmutableGVs(DestModule);
+ internalizeGVsAfterImport(DestModule);
NumImportedFunctions += (ImportedCount - ImportedGVCount);
NumImportedGlobalVars += ImportedGVCount;
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 34de87433367..86b7f3e49ee6 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -1,9 +1,8 @@
//===-- GlobalDCE.cpp - DCE unreachable internal functions ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 3005aafd06b1..c4fb3ce77f6e 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1,9 +1,8 @@
//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -730,7 +729,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
break;
if (Idxs.size() == GEPI->getNumOperands()-1)
Changed |= OptimizeAwayTrappingUsesOfValue(
- GEPI, ConstantExpr::getGetElementPtr(nullptr, NewV, Idxs));
+ GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(),
+ NewV, Idxs));
if (GEPI->use_empty()) {
Changed = true;
GEPI->eraseFromParent();
@@ -906,9 +906,10 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
// Replace the cmp X, 0 with a use of the bool value.
// Sink the load to where the compare was, if atomic rules allow us to.
- Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
+ Value *LV = new LoadInst(InitBool->getValueType(), InitBool,
+ InitBool->getName() + ".val", false, 0,
LI->getOrdering(), LI->getSyncScopeID(),
- LI->isUnordered() ? (Instruction*)ICI : LI);
+ LI->isUnordered() ? (Instruction *)ICI : LI);
InitBoolUsed = true;
switch (ICI->getPredicate()) {
default: llvm_unreachable("Unknown ICmp Predicate!");
@@ -1041,7 +1042,8 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
}
// Insert a load from the global, and use it instead of the malloc.
- Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
+ Value *NL =
+ new LoadInst(GV->getValueType(), GV, GV->getName() + ".val", InsertPt);
U->replaceUsesOfWith(Alloc, NL);
}
}
@@ -1164,10 +1166,10 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
// This is a scalarized version of the load from the global. Just create
// a new Load of the scalarized global.
- Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
- InsertedScalarizedValues,
- PHIsToRewrite),
- LI->getName()+".f"+Twine(FieldNo), LI);
+ Value *V = GetHeapSROAValue(LI->getOperand(0), FieldNo,
+ InsertedScalarizedValues, PHIsToRewrite);
+ Result = new LoadInst(V->getType()->getPointerElementType(), V,
+ LI->getName() + ".f" + Twine(FieldNo), LI);
} else {
PHINode *PN = cast<PHINode>(V);
// PN's type is pointer to struct. Make a new PHI of pointer to struct
@@ -1357,7 +1359,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// Within the NullPtrBlock, we need to emit a comparison and branch for each
// pointer, because some may be null while others are not.
for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
- Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
+ Value *GVVal =
+ new LoadInst(cast<GlobalVariable>(FieldGlobals[i])->getValueType(),
+ FieldGlobals[i], "tmp", NullPtrBlock);
Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
Constant::getNullValue(GVVal->getType()));
BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
@@ -1650,6 +1654,9 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
for(auto *GVe : GVs){
DIGlobalVariable *DGV = GVe->getVariable();
DIExpression *E = GVe->getExpression();
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ unsigned SizeInOctets =
+ DL.getTypeAllocSizeInBits(NewGV->getType()->getElementType()) / 8;
// It is expected that the address of global optimized variable is on
// top of the stack. After optimization, value of that variable will
@@ -1660,10 +1667,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
// DW_OP_deref DW_OP_constu <ValMinus>
// DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value
SmallVector<uint64_t, 12> Ops = {
- dwarf::DW_OP_deref, dwarf::DW_OP_constu, ValMinus,
- dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit,
+ dwarf::DW_OP_deref_size, SizeInOctets,
+ dwarf::DW_OP_constu, ValMinus,
+ dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit,
dwarf::DW_OP_plus};
- E = DIExpression::prependOpcodes(E, Ops, DIExpression::WithStackValue);
+ bool WithStackValue = true;
+ E = DIExpression::prependOpcodes(E, Ops, WithStackValue);
DIGlobalVariableExpression *DGVE =
DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E);
NewGV->addDebugInfo(DGVE);
@@ -1701,7 +1710,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
assert(LI->getOperand(0) == GV && "Not a copy!");
// Insert a new load, to preserve the saved value.
- StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
+ StoreVal = new LoadInst(NewGV->getValueType(), NewGV,
+ LI->getName() + ".b", false, 0,
LI->getOrdering(), LI->getSyncScopeID(), LI);
} else {
assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
@@ -1717,8 +1727,9 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
} else {
// Change the load into a load of bool then a select.
LoadInst *LI = cast<LoadInst>(UI);
- LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
- LI->getOrdering(), LI->getSyncScopeID(), LI);
+ LoadInst *NLI =
+ new LoadInst(NewGV->getValueType(), NewGV, LI->getName() + ".b",
+ false, 0, LI->getOrdering(), LI->getSyncScopeID(), LI);
Instruction *NSI;
if (IsOneZero)
NSI = new ZExtInst(NLI, LI->getType(), "", LI);
@@ -1970,7 +1981,12 @@ static bool processInternalGlobal(
}
if (GS.StoredType <= GlobalStatus::InitializerStored) {
LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
- GV->setConstant(true);
+
+ // Don't actually mark a global constant if it's atomic because atomic loads
+ // are implemented by a trivial cmpxchg in some edge-cases and that usually
+ // requires write access to the variable even if it's not actually changed.
+ if (GS.Ordering == AtomicOrdering::NotAtomic)
+ GV->setConstant(true);
// Clean up any obviously simplifiable users now.
CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
@@ -2084,21 +2100,21 @@ static void ChangeCalleesToFastCall(Function *F) {
}
}
-static AttributeList StripNest(LLVMContext &C, AttributeList Attrs) {
- // There can be at most one attribute set with a nest attribute.
- unsigned NestIndex;
- if (Attrs.hasAttrSomewhere(Attribute::Nest, &NestIndex))
- return Attrs.removeAttribute(C, NestIndex, Attribute::Nest);
+static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
+ Attribute::AttrKind A) {
+ unsigned AttrIndex;
+ if (Attrs.hasAttrSomewhere(A, &AttrIndex))
+ return Attrs.removeAttribute(C, AttrIndex, A);
return Attrs;
}
-static void RemoveNestAttribute(Function *F) {
- F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
+static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
+ F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A));
for (User *U : F->users()) {
if (isa<BlockAddress>(U))
continue;
CallSite CS(cast<Instruction>(U));
- CS.setAttributes(StripNest(F->getContext(), CS.getAttributes()));
+ CS.setAttributes(StripAttr(F->getContext(), CS.getAttributes(), A));
}
}
@@ -2113,13 +2129,6 @@ static bool hasChangeableCC(Function *F) {
if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
return false;
- // Don't break the invariant that the inalloca parameter is the only parameter
- // passed in memory.
- // FIXME: GlobalOpt should remove inalloca when possible and hoist the dynamic
- // alloca it uses to the entry block if possible.
- if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
- return false;
-
// FIXME: Change CC for the whole chain of musttail calls when possible.
//
// Can't change CC of the function that either has musttail calls, or is a
@@ -2281,6 +2290,17 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
if (!F->hasLocalLinkage())
continue;
+ // If we have an inalloca parameter that we can safely remove the
+ // inalloca attribute from, do so. This unlocks optimizations that
+ // wouldn't be safe in the presence of inalloca.
+ // FIXME: We should also hoist alloca affected by this to the entry
+ // block if possible.
+ if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
+ !F->hasAddressTaken()) {
+ RemoveAttribute(F, Attribute::InAlloca);
+ Changed = true;
+ }
+
if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
NumInternalFunc++;
TargetTransformInfo &TTI = GetTTI(*F);
@@ -2289,8 +2309,8 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
// cold at all call sites and the callers contain no other non coldcc
// calls.
if (EnableColdCCStressTest ||
- (isValidCandidateForColdCC(*F, GetBFI, AllCallsCold) &&
- TTI.useColdCCForColdCall(*F))) {
+ (TTI.useColdCCForColdCall(*F) &&
+ isValidCandidateForColdCC(*F, GetBFI, AllCallsCold))) {
F->setCallingConv(CallingConv::Cold);
changeCallSitesToColdCC(F);
Changed = true;
@@ -2313,7 +2333,7 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
!F->hasAddressTaken()) {
// The function is not used by a trampoline intrinsic, so it is safe
// to remove the 'nest' attribute.
- RemoveNestAttribute(F);
+ RemoveAttribute(F, Attribute::Nest);
++NumNestRemoved;
Changed = true;
}
@@ -2808,46 +2828,20 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
/// Returns whether the given function is an empty C++ destructor and can
/// therefore be eliminated.
/// Note that we assume that other optimization passes have already simplified
-/// the code so we only look for a function with a single basic block, where
-/// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
-/// other side-effect free instructions.
-static bool cxxDtorIsEmpty(const Function &Fn,
- SmallPtrSet<const Function *, 8> &CalledFunctions) {
+/// the code so we simply check for 'ret'.
+static bool cxxDtorIsEmpty(const Function &Fn) {
// FIXME: We could eliminate C++ destructors if they're readonly/readnone and
// nounwind, but that doesn't seem worth doing.
if (Fn.isDeclaration())
return false;
- if (++Fn.begin() != Fn.end())
- return false;
-
- const BasicBlock &EntryBlock = Fn.getEntryBlock();
- for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end();
- I != E; ++I) {
- if (const CallInst *CI = dyn_cast<CallInst>(I)) {
- // Ignore debug intrinsics.
- if (isa<DbgInfoIntrinsic>(CI))
- continue;
-
- const Function *CalledFn = CI->getCalledFunction();
-
- if (!CalledFn)
- return false;
-
- SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
-
- // Don't treat recursive functions as empty.
- if (!NewCalledFunctions.insert(CalledFn).second)
- return false;
-
- if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
- return false;
- } else if (isa<ReturnInst>(*I))
- return true; // We're done.
- else if (I->mayHaveSideEffects())
- return false; // Destructor with side effects, bail.
+ for (auto &I : Fn.getEntryBlock()) {
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+ if (isa<ReturnInst>(I))
+ return true;
+ break;
}
-
return false;
}
@@ -2879,11 +2873,7 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
Function *DtorFn =
dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
- if (!DtorFn)
- continue;
-
- SmallPtrSet<const Function *, 8> CalledFunctions;
- if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions))
+ if (!DtorFn || !cxxDtorIsEmpty(*DtorFn))
continue;
// Just remove the call.
diff --git a/lib/Transforms/IPO/GlobalSplit.cpp b/lib/Transforms/IPO/GlobalSplit.cpp
index 792f4b3052a3..060043a40b89 100644
--- a/lib/Transforms/IPO/GlobalSplit.cpp
+++ b/lib/Transforms/IPO/GlobalSplit.cpp
@@ -1,9 +1,8 @@
//===- GlobalSplit.cpp - global variable splitter -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/HotColdSplitting.cpp b/lib/Transforms/IPO/HotColdSplitting.cpp
index 924a7d5fbd9c..ab1a9a79cad6 100644
--- a/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -1,16 +1,28 @@
//===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// Outline cold regions to a separate function.
-// TODO: Update BFI and BPI
-// TODO: Add all the outlined functions to a separate section.
-//
+///
+/// \file
+/// The goal of hot/cold splitting is to improve the memory locality of code.
+/// The splitting pass does this by identifying cold blocks and moving them into
+/// separate functions.
+///
+/// When the splitting pass finds a cold block (referred to as "the sink"), it
+/// grows a maximal cold region around that block. The maximal region contains
+/// all blocks (post-)dominated by the sink [*]. In theory, these blocks are as
+/// cold as the sink. Once a region is found, it's split out of the original
+/// function provided it's profitable to do so.
+///
+/// [*] In practice, there is some added complexity because some blocks are not
+/// safe to extract.
+///
+/// TODO: Use the PM to get domtrees, and preserve BFI/BPI.
+/// TODO: Reorder outlined functions.
+///
//===----------------------------------------------------------------------===//
#include "llvm/ADT/PostOrderIterator.h"
@@ -53,7 +65,6 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
@@ -69,16 +80,12 @@ static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
cl::init(true), cl::Hidden);
static cl::opt<int>
- MinOutliningThreshold("min-outlining-thresh", cl::init(3), cl::Hidden,
- cl::desc("Code size threshold for outlining within a "
- "single BB (as a multiple of TCC_Basic)"));
+ SplittingThreshold("hotcoldsplit-threshold", cl::init(2), cl::Hidden,
+ cl::desc("Base penalty for splitting cold code (as a "
+ "multiple of TCC_Basic)"));
namespace {
-struct PostDomTree : PostDomTreeBase<BasicBlock> {
- PostDomTree(Function &F) { recalculate(F); }
-};
-
/// A sequence of basic blocks.
///
/// A 0-sized SmallVector is slightly cheaper to move than a std::vector.
@@ -101,13 +108,14 @@ bool blockEndsInUnreachable(const BasicBlock &BB) {
bool unlikelyExecuted(BasicBlock &BB) {
// Exception handling blocks are unlikely executed.
- if (BB.isEHPad())
+ if (BB.isEHPad() || isa<ResumeInst>(BB.getTerminator()))
return true;
- // The block is cold if it calls/invokes a cold function.
+ // The block is cold if it calls/invokes a cold function. However, do not
+ // mark sanitizer traps as cold.
for (Instruction &I : BB)
if (auto CS = CallSite(&I))
- if (CS.hasFnAttr(Attribute::Cold))
+ if (CS.hasFnAttr(Attribute::Cold) && !CS->getMetadata("nosanitize"))
return true;
// The block is cold if it has an unreachable terminator, unless it's
@@ -125,38 +133,39 @@ bool unlikelyExecuted(BasicBlock &BB) {
/// Check whether it's safe to outline \p BB.
static bool mayExtractBlock(const BasicBlock &BB) {
- return !BB.hasAddressTaken() && !BB.isEHPad();
-}
-
-/// Check whether \p Region is profitable to outline.
-static bool isProfitableToOutline(const BlockSequence &Region,
- TargetTransformInfo &TTI) {
- if (Region.size() > 1)
- return true;
-
- int Cost = 0;
- const BasicBlock &BB = *Region[0];
- for (const Instruction &I : BB) {
- if (isa<DbgInfoIntrinsic>(&I) || &I == BB.getTerminator())
- continue;
-
- Cost += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
-
- if (Cost >= (MinOutliningThreshold * TargetTransformInfo::TCC_Basic))
- return true;
- }
- return false;
+ // EH pads are unsafe to outline because doing so breaks EH type tables. It
+ // follows that invoke instructions cannot be extracted, because CodeExtractor
+ // requires unwind destinations to be within the extraction region.
+ //
+ // Resumes that are not reachable from a cleanup landing pad are considered to
+ // be unreachable. It’s not safe to split them out either.
+ auto Term = BB.getTerminator();
+ return !BB.hasAddressTaken() && !BB.isEHPad() && !isa<InvokeInst>(Term) &&
+ !isa<ResumeInst>(Term);
}
-/// Mark \p F cold. Return true if it's changed.
-static bool markEntireFunctionCold(Function &F) {
- assert(!F.hasFnAttribute(Attribute::OptimizeNone) && "Can't mark this cold");
+/// Mark \p F cold. Based on this assumption, also optimize it for minimum size.
+/// If \p UpdateEntryCount is true (set when this is a new split function and
+/// module has profile data), set entry count to 0 to ensure treated as cold.
+/// Return true if the function is changed.
+static bool markFunctionCold(Function &F, bool UpdateEntryCount = false) {
+ assert(!F.hasOptNone() && "Can't mark this cold");
bool Changed = false;
+ if (!F.hasFnAttribute(Attribute::Cold)) {
+ F.addFnAttr(Attribute::Cold);
+ Changed = true;
+ }
if (!F.hasFnAttribute(Attribute::MinSize)) {
F.addFnAttr(Attribute::MinSize);
Changed = true;
}
- // TODO: Move this function into a cold section.
+ if (UpdateEntryCount) {
+ // Set the entry count to 0 to ensure it is placed in the unlikely text
+ // section when function sections are enabled.
+ F.setEntryCount(0);
+ Changed = true;
+ }
+
return Changed;
}
@@ -165,24 +174,24 @@ public:
HotColdSplitting(ProfileSummaryInfo *ProfSI,
function_ref<BlockFrequencyInfo *(Function &)> GBFI,
function_ref<TargetTransformInfo &(Function &)> GTTI,
- std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
- : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}
+ std::function<OptimizationRemarkEmitter &(Function &)> *GORE,
+ function_ref<AssumptionCache *(Function &)> LAC)
+ : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE), LookupAC(LAC) {}
bool run(Module &M);
private:
+ bool isFunctionCold(const Function &F) const;
bool shouldOutlineFrom(const Function &F) const;
- bool outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
- BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
- DominatorTree &DT, PostDomTree &PDT,
- OptimizationRemarkEmitter &ORE);
+ bool outlineColdRegions(Function &F, bool HasProfileSummary);
Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT,
BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
- OptimizationRemarkEmitter &ORE, unsigned Count);
- SmallPtrSet<const Function *, 2> OutlinedFunctions;
+ OptimizationRemarkEmitter &ORE,
+ AssumptionCache *AC, unsigned Count);
ProfileSummaryInfo *PSI;
function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
function_ref<TargetTransformInfo &(Function &)> GetTTI;
std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
+ function_ref<AssumptionCache *(Function &)> LookupAC;
};
class HotColdSplittingLegacyPass : public ModulePass {
@@ -193,10 +202,10 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<BlockFrequencyInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addUsedIfAvailable<AssumptionCacheTracker>();
}
bool runOnModule(Module &M) override;
@@ -204,59 +213,141 @@ public:
} // end anonymous namespace
-// Returns false if the function should not be considered for hot-cold split
-// optimization.
-bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
- // Do not try to outline again from an already outlined cold function.
- if (OutlinedFunctions.count(&F))
- return false;
+/// Check whether \p F is inherently cold.
+bool HotColdSplitting::isFunctionCold(const Function &F) const {
+ if (F.hasFnAttribute(Attribute::Cold))
+ return true;
- if (F.size() <= 2)
- return false;
+ if (F.getCallingConv() == CallingConv::Cold)
+ return true;
- // TODO: Consider only skipping functions marked `optnone` or `cold`.
+ if (PSI->isFunctionEntryCold(&F))
+ return true;
- if (F.hasAddressTaken())
- return false;
+ return false;
+}
+// Returns false if the function should not be considered for hot-cold split
+// optimization.
+bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
if (F.hasFnAttribute(Attribute::AlwaysInline))
return false;
if (F.hasFnAttribute(Attribute::NoInline))
return false;
- if (F.getCallingConv() == CallingConv::Cold)
+ if (F.hasFnAttribute(Attribute::SanitizeAddress) ||
+ F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
+ F.hasFnAttribute(Attribute::SanitizeThread) ||
+ F.hasFnAttribute(Attribute::SanitizeMemory))
return false;
- if (PSI->isFunctionEntryCold(&F))
- return false;
return true;
}
+/// Get the benefit score of outlining \p Region.
+static int getOutliningBenefit(ArrayRef<BasicBlock *> Region,
+ TargetTransformInfo &TTI) {
+ // Sum up the code size costs of non-terminator instructions. Tight coupling
+ // with \ref getOutliningPenalty is needed to model the costs of terminators.
+ int Benefit = 0;
+ for (BasicBlock *BB : Region)
+ for (Instruction &I : BB->instructionsWithoutDebug())
+ if (&I != BB->getTerminator())
+ Benefit +=
+ TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
+
+ return Benefit;
+}
+
+/// Get the penalty score for outlining \p Region.
+static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
+ unsigned NumInputs, unsigned NumOutputs) {
+ int Penalty = SplittingThreshold;
+ LLVM_DEBUG(dbgs() << "Applying penalty for splitting: " << Penalty << "\n");
+
+ // If the splitting threshold is set at or below zero, skip the usual
+ // profitability check.
+ if (SplittingThreshold <= 0)
+ return Penalty;
+
+ // The typical code size cost for materializing an argument for the outlined
+ // call.
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumInputs << " inputs\n");
+ const int CostForArgMaterialization = TargetTransformInfo::TCC_Basic;
+ Penalty += CostForArgMaterialization * NumInputs;
+
+ // The typical code size cost for an output alloca, its associated store, and
+ // its associated reload.
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputs << " outputs\n");
+ const int CostForRegionOutput = 3 * TargetTransformInfo::TCC_Basic;
+ Penalty += CostForRegionOutput * NumOutputs;
+
+ // Find the number of distinct exit blocks for the region. Use a conservative
+ // check to determine whether control returns from the region.
+ bool NoBlocksReturn = true;
+ SmallPtrSet<BasicBlock *, 2> SuccsOutsideRegion;
+ for (BasicBlock *BB : Region) {
+ // If a block has no successors, only assume it does not return if it's
+ // unreachable.
+ if (succ_empty(BB)) {
+ NoBlocksReturn &= isa<UnreachableInst>(BB->getTerminator());
+ continue;
+ }
+
+ for (BasicBlock *SuccBB : successors(BB)) {
+ if (find(Region, SuccBB) == Region.end()) {
+ NoBlocksReturn = false;
+ SuccsOutsideRegion.insert(SuccBB);
+ }
+ }
+ }
+
+ // Apply a `noreturn` bonus.
+ if (NoBlocksReturn) {
+ LLVM_DEBUG(dbgs() << "Applying bonus for: " << Region.size()
+ << " non-returning terminators\n");
+ Penalty -= Region.size();
+ }
+
+ // Apply a penalty for having more than one successor outside of the region.
+ // This penalty accounts for the switch needed in the caller.
+ if (!SuccsOutsideRegion.empty()) {
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << SuccsOutsideRegion.size()
+ << " non-region successors\n");
+ Penalty += (SuccsOutsideRegion.size() - 1) * TargetTransformInfo::TCC_Basic;
+ }
+
+ return Penalty;
+}
+
Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
DominatorTree &DT,
BlockFrequencyInfo *BFI,
TargetTransformInfo &TTI,
OptimizationRemarkEmitter &ORE,
+ AssumptionCache *AC,
unsigned Count) {
assert(!Region.empty());
// TODO: Pass BFI and BPI to update profile information.
CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr,
- /* BPI */ nullptr, /* AllowVarArgs */ false,
+ /* BPI */ nullptr, AC, /* AllowVarArgs */ false,
/* AllowAlloca */ false,
/* Suffix */ "cold." + std::to_string(Count));
+ // Perform a simple cost/benefit analysis to decide whether or not to permit
+ // splitting.
SetVector<Value *> Inputs, Outputs, Sinks;
CE.findInputsOutputs(Inputs, Outputs, Sinks);
-
- // Do not extract regions that have live exit variables.
- if (Outputs.size() > 0) {
- LLVM_DEBUG(llvm::dbgs() << "Not outlining; live outputs\n");
+ int OutliningBenefit = getOutliningBenefit(Region, TTI);
+ int OutliningPenalty =
+ getOutliningPenalty(Region, Inputs.size(), Outputs.size());
+ LLVM_DEBUG(dbgs() << "Split profitability: benefit = " << OutliningBenefit
+ << ", penalty = " << OutliningPenalty << "\n");
+ if (OutliningBenefit <= OutliningPenalty)
return nullptr;
- }
- // TODO: Run MergeBasicBlockIntoOnlyPred on the outlined function.
Function *OrigF = Region[0]->getParent();
if (Function *OutF = CE.extractCodeRegion()) {
User *U = *OutF->user_begin();
@@ -269,9 +360,7 @@ Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
}
CI->setIsNoInline();
- // Try to make the outlined code as small as possible on the assumption
- // that it's cold.
- markEntireFunctionCold(*OutF);
+ markFunctionCold(*OutF, BFI != nullptr);
LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF);
ORE.emit([&]() {
@@ -298,6 +387,8 @@ using BlockTy = std::pair<BasicBlock *, unsigned>;
namespace {
/// A maximal outlining region. This contains all blocks post-dominated by a
/// sink block, the sink block itself, and all blocks dominated by the sink.
+/// If sink-predecessors and sink-successors cannot be extracted in one region,
+/// the static constructor returns a list of suitable extraction regions.
class OutliningRegion {
/// A list of (block, score) pairs. A block's score is non-zero iff it's a
/// viable sub-region entry point. Blocks with higher scores are better entry
@@ -312,12 +403,9 @@ class OutliningRegion {
/// Whether the entire function is cold.
bool EntireFunctionCold = false;
- /// Whether or not \p BB could be the entry point of an extracted region.
- static bool isViableEntryPoint(BasicBlock &BB) { return !BB.isEHPad(); }
-
/// If \p BB is a viable entry point, return \p Score. Return 0 otherwise.
static unsigned getEntryPointScore(BasicBlock &BB, unsigned Score) {
- return isViableEntryPoint(BB) ? Score : 0;
+ return mayExtractBlock(BB) ? Score : 0;
}
/// These scores should be lower than the score for predecessor blocks,
@@ -333,21 +421,23 @@ public:
OutliningRegion(OutliningRegion &&) = default;
OutliningRegion &operator=(OutliningRegion &&) = default;
- static OutliningRegion create(BasicBlock &SinkBB, const DominatorTree &DT,
- const PostDomTree &PDT) {
- OutliningRegion ColdRegion;
-
+ static std::vector<OutliningRegion> create(BasicBlock &SinkBB,
+ const DominatorTree &DT,
+ const PostDominatorTree &PDT) {
+ std::vector<OutliningRegion> Regions;
SmallPtrSet<BasicBlock *, 4> RegionBlocks;
+ Regions.emplace_back();
+ OutliningRegion *ColdRegion = &Regions.back();
+
auto addBlockToRegion = [&](BasicBlock *BB, unsigned Score) {
RegionBlocks.insert(BB);
- ColdRegion.Blocks.emplace_back(BB, Score);
- assert(RegionBlocks.size() == ColdRegion.Blocks.size() && "Duplicate BB");
+ ColdRegion->Blocks.emplace_back(BB, Score);
};
// The ancestor farthest-away from SinkBB, and also post-dominated by it.
unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
- ColdRegion.SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr;
+ ColdRegion->SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr;
unsigned BestScore = SinkScore;
// Visit SinkBB's ancestors using inverse DFS.
@@ -360,8 +450,8 @@ public:
// If the predecessor is cold and has no predecessors, the entire
// function must be cold.
if (SinkPostDom && pred_empty(&PredBB)) {
- ColdRegion.EntireFunctionCold = true;
- return ColdRegion;
+ ColdRegion->EntireFunctionCold = true;
+ return Regions;
}
// If SinkBB does not post-dominate a predecessor, do not mark the
@@ -376,7 +466,7 @@ public:
// considered as entry points before the sink block.
unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
if (PredScore > BestScore) {
- ColdRegion.SuggestedEntryPoint = &PredBB;
+ ColdRegion->SuggestedEntryPoint = &PredBB;
BestScore = PredScore;
}
@@ -384,9 +474,19 @@ public:
++PredIt;
}
- // Add SinkBB to the cold region. It's considered as an entry point before
- // any sink-successor blocks.
- addBlockToRegion(&SinkBB, SinkScore);
+ // If the sink can be added to the cold region, do so. It's considered as
+ // an entry point before any sink-successor blocks.
+ //
+ // Otherwise, split cold sink-successor blocks using a separate region.
+ // This satisfies the requirement that all extraction blocks other than the
+ // first have predecessors within the extraction region.
+ if (mayExtractBlock(SinkBB)) {
+ addBlockToRegion(&SinkBB, SinkScore);
+ } else {
+ Regions.emplace_back();
+ ColdRegion = &Regions.back();
+ BestScore = 0;
+ }
// Find all successors of SinkBB dominated by SinkBB using DFS.
auto SuccIt = ++df_begin(&SinkBB);
@@ -407,7 +507,7 @@ public:
unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
if (SuccScore > BestScore) {
- ColdRegion.SuggestedEntryPoint = &SuccBB;
+ ColdRegion->SuggestedEntryPoint = &SuccBB;
BestScore = SuccScore;
}
@@ -415,7 +515,7 @@ public:
++SuccIt;
}
- return ColdRegion;
+ return Regions;
}
/// Whether this region has nothing to extract.
@@ -461,11 +561,7 @@ public:
};
} // namespace
-bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
- BlockFrequencyInfo *BFI,
- TargetTransformInfo &TTI,
- DominatorTree &DT, PostDomTree &PDT,
- OptimizationRemarkEmitter &ORE) {
+bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
bool Changed = false;
// The set of cold blocks.
@@ -479,17 +575,28 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
// the first region to contain a block.
ReversePostOrderTraversal<Function *> RPOT(&F);
+ // Calculate domtrees lazily. This reduces compile-time significantly.
+ std::unique_ptr<DominatorTree> DT;
+ std::unique_ptr<PostDominatorTree> PDT;
+
+ // Calculate BFI lazily (it's only used to query ProfileSummaryInfo). This
+ // reduces compile-time significantly. TODO: When we *do* use BFI, we should
+ // be able to salvage its domtrees instead of recomputing them.
+ BlockFrequencyInfo *BFI = nullptr;
+ if (HasProfileSummary)
+ BFI = GetBFI(F);
+
+ TargetTransformInfo &TTI = GetTTI(F);
+ OptimizationRemarkEmitter &ORE = (*GetORE)(F);
+ AssumptionCache *AC = LookupAC(F);
+
// Find all cold regions.
for (BasicBlock *BB : RPOT) {
- // Skip blocks which can't be outlined.
- if (!mayExtractBlock(*BB))
- continue;
-
// This block is already part of some outlining region.
if (ColdBlocks.count(BB))
continue;
- bool Cold = PSI.isColdBlock(BB, BFI) ||
+ bool Cold = (BFI && PSI->isColdBlock(BB, BFI)) ||
(EnableStaticAnalyis && unlikelyExecuted(*BB));
if (!Cold)
continue;
@@ -499,28 +606,35 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
BB->dump();
});
- auto Region = OutliningRegion::create(*BB, DT, PDT);
- if (Region.empty())
- continue;
+ if (!DT)
+ DT = make_unique<DominatorTree>(F);
+ if (!PDT)
+ PDT = make_unique<PostDominatorTree>(F);
- if (Region.isEntireFunctionCold()) {
- LLVM_DEBUG(dbgs() << "Entire function is cold\n");
- return markEntireFunctionCold(F);
- }
+ auto Regions = OutliningRegion::create(*BB, *DT, *PDT);
+ for (OutliningRegion &Region : Regions) {
+ if (Region.empty())
+ continue;
- // If this outlining region intersects with another, drop the new region.
- //
- // TODO: It's theoretically possible to outline more by only keeping the
- // largest region which contains a block, but the extra bookkeeping to do
- // this is tricky/expensive.
- bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
- return !ColdBlocks.insert(Block.first).second;
- });
- if (RegionsOverlap)
- continue;
+ if (Region.isEntireFunctionCold()) {
+ LLVM_DEBUG(dbgs() << "Entire function is cold\n");
+ return markFunctionCold(F);
+ }
+
+ // If this outlining region intersects with another, drop the new region.
+ //
+ // TODO: It's theoretically possible to outline more by only keeping the
+ // largest region which contains a block, but the extra bookkeeping to do
+ // this is tricky/expensive.
+ bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
+ return !ColdBlocks.insert(Block.first).second;
+ });
+ if (RegionsOverlap)
+ continue;
- OutliningWorklist.emplace_back(std::move(Region));
- ++NumColdRegionsFound;
+ OutliningWorklist.emplace_back(std::move(Region));
+ ++NumColdRegionsFound;
+ }
}
// Outline single-entry cold regions, splitting up larger regions as needed.
@@ -529,26 +643,17 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
OutliningRegion Region = OutliningWorklist.pop_back_val();
assert(!Region.empty() && "Empty outlining region in worklist");
do {
- BlockSequence SubRegion = Region.takeSingleEntrySubRegion(DT);
- if (!isProfitableToOutline(SubRegion, TTI)) {
- LLVM_DEBUG({
- dbgs() << "Skipping outlining; not profitable to outline\n";
- SubRegion[0]->dump();
- });
- continue;
- }
-
+ BlockSequence SubRegion = Region.takeSingleEntrySubRegion(*DT);
LLVM_DEBUG({
dbgs() << "Hot/cold splitting attempting to outline these blocks:\n";
for (BasicBlock *BB : SubRegion)
BB->dump();
});
- Function *Outlined =
- extractColdRegion(SubRegion, DT, BFI, TTI, ORE, OutlinedFunctionID);
+ Function *Outlined = extractColdRegion(SubRegion, *DT, BFI, TTI, ORE, AC,
+ OutlinedFunctionID);
if (Outlined) {
++OutlinedFunctionID;
- OutlinedFunctions.insert(Outlined);
Changed = true;
}
} while (!Region.empty());
@@ -559,20 +664,31 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
bool HotColdSplitting::run(Module &M) {
bool Changed = false;
- OutlinedFunctions.clear();
- for (auto &F : M) {
+ bool HasProfileSummary = (M.getProfileSummary(/* IsCS */ false) != nullptr);
+ for (auto It = M.begin(), End = M.end(); It != End; ++It) {
+ Function &F = *It;
+
+ // Do not touch declarations.
+ if (F.isDeclaration())
+ continue;
+
+ // Do not modify `optnone` functions.
+ if (F.hasOptNone())
+ continue;
+
+ // Detect inherently cold functions and mark them as such.
+ if (isFunctionCold(F)) {
+ Changed |= markFunctionCold(F);
+ continue;
+ }
+
if (!shouldOutlineFrom(F)) {
LLVM_DEBUG(llvm::dbgs() << "Skipping " << F.getName() << "\n");
continue;
}
+
LLVM_DEBUG(llvm::dbgs() << "Outlining in " << F.getName() << "\n");
- DominatorTree DT(F);
- PostDomTree PDT(F);
- PDT.recalculate(F);
- BlockFrequencyInfo *BFI = GetBFI(F);
- TargetTransformInfo &TTI = GetTTI(F);
- OptimizationRemarkEmitter &ORE = (*GetORE)(F);
- Changed |= outlineColdRegions(F, *PSI, BFI, TTI, DT, PDT, ORE);
+ Changed |= outlineColdRegions(F, HasProfileSummary);
}
return Changed;
}
@@ -594,17 +710,21 @@ bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
ORE.reset(new OptimizationRemarkEmitter(&F));
return *ORE.get();
};
+ auto LookupAC = [this](Function &F) -> AssumptionCache * {
+ if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>())
+ return ACT->lookupAssumptionCache(F);
+ return nullptr;
+ };
- return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M);
+ return HotColdSplitting(PSI, GBFI, GTTI, &GetORE, LookupAC).run(M);
}
PreservedAnalyses
HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) {
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
- std::function<AssumptionCache &(Function &)> GetAssumptionCache =
- [&FAM](Function &F) -> AssumptionCache & {
- return FAM.getResult<AssumptionAnalysis>(F);
+ auto LookupAC = [&FAM](Function &F) -> AssumptionCache * {
+ return FAM.getCachedResult<AssumptionAnalysis>(F);
};
auto GBFI = [&FAM](Function &F) {
@@ -625,7 +745,7 @@ HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) {
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
- if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M))
+ if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE, LookupAC).run(M))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index 7d55ebecbf92..7dc4d9ee9e34 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -1,9 +1,8 @@
//===-- IPConstantPropagation.cpp - Propagate constants through calls -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -62,32 +61,55 @@ static bool PropagateConstantsIntoArguments(Function &F) {
// Ignore blockaddress uses.
if (isa<BlockAddress>(UR)) continue;
- // Used by a non-instruction, or not the callee of a function, do not
- // transform.
- if (!isa<CallInst>(UR) && !isa<InvokeInst>(UR))
+ // If no abstract call site was created we did not understand the use, bail.
+ AbstractCallSite ACS(&U);
+ if (!ACS)
return false;
- CallSite CS(cast<Instruction>(UR));
- if (!CS.isCallee(&U))
+ // Mismatched argument count is undefined behavior. Simply bail out to avoid
+ // handling of such situations below (avoiding asserts/crashes).
+ unsigned NumActualArgs = ACS.getNumArgOperands();
+ if (F.isVarArg() ? ArgumentConstants.size() > NumActualArgs
+ : ArgumentConstants.size() != NumActualArgs)
return false;
// Check out all of the potentially constant arguments. Note that we don't
// inspect varargs here.
- CallSite::arg_iterator AI = CS.arg_begin();
Function::arg_iterator Arg = F.arg_begin();
- for (unsigned i = 0, e = ArgumentConstants.size(); i != e;
- ++i, ++AI, ++Arg) {
+ for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++Arg) {
// If this argument is known non-constant, ignore it.
if (ArgumentConstants[i].second)
continue;
- Constant *C = dyn_cast<Constant>(*AI);
+ Value *V = ACS.getCallArgOperand(i);
+ Constant *C = dyn_cast_or_null<Constant>(V);
+
+ // Mismatched argument type is undefined behavior. Simply bail out to avoid
+ // handling of such situations below (avoiding asserts/crashes).
+ if (C && Arg->getType() != C->getType())
+ return false;
+
+ // We can only propagate thread independent values through callbacks.
+ // This is different to direct/indirect call sites because for them we
+ // know the thread executing the caller and callee is the same. For
+ // callbacks this is not guaranteed, thus a thread dependent value could
+ // be different for the caller and callee, making it invalid to propagate.
+ if (C && ACS.isCallbackCall() && C->isThreadDependent()) {
+ // Argument became non-constant. If all arguments are non-constant now,
+ // give up on this function.
+ if (++NumNonconstant == ArgumentConstants.size())
+ return false;
+
+ ArgumentConstants[i].second = true;
+ continue;
+ }
+
if (C && ArgumentConstants[i].first == nullptr) {
ArgumentConstants[i].first = C; // First constant seen.
} else if (C && ArgumentConstants[i].first == C) {
// Still the constant value we think it is.
- } else if (*AI == &*Arg) {
+ } else if (V == &*Arg) {
// Ignore recursive calls passing argument down.
} else {
// Argument became non-constant. If all arguments are non-constant now,
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 973382e2b097..34db75dd8b03 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -1,9 +1,8 @@
//===-- IPO.cpp -----------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -46,6 +45,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeLowerTypeTestsPass(Registry);
initializeMergeFunctionsPass(Registry);
initializePartialInlinerLegacyPassPass(Registry);
+ initializeAttributorLegacyPassPass(Registry);
initializePostOrderFunctionAttrsLegacyPassPass(Registry);
initializeReversePostOrderFunctionAttrsLegacyPassPass(Registry);
initializePruneEHPass(Registry);
diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp
index 470f97b8ba61..7f5511e008e1 100644
--- a/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -1,9 +1,8 @@
//===- InferFunctionAttrs.cpp - Infer implicit function attributes --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -26,7 +25,7 @@ static bool inferAllPrototypeAttributes(Module &M,
for (Function &F : M.functions())
// We only infer things using the prototype and the name; we don't need
// definitions.
- if (F.isDeclaration() && !F.hasFnAttribute((Attribute::OptimizeNone)))
+ if (F.isDeclaration() && !F.hasOptNone())
Changed |= inferLibFuncAttributes(F, TLI);
return Changed;
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 82bba1e5c93b..efb71b73cbb7 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -1,9 +1,8 @@
//===- InlineSimple.cpp - Code to perform simple function inlining --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -69,9 +68,9 @@ public:
[&](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
- return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache,
- /*GetBFI=*/None, PSI,
- RemarksEnabled ? &ORE : nullptr);
+ return llvm::getInlineCost(
+ cast<CallBase>(*CS.getInstruction()), Params, TTI, GetAssumptionCache,
+ /*GetBFI=*/None, PSI, RemarksEnabled ? &ORE : nullptr);
}
bool runOnSCC(CallGraphSCC &SCC) override;
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 66a6f80f31e4..945f8affae6e 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -1,9 +1,8 @@
//===- Inliner.cpp - Code common to all inliners --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -672,7 +671,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n");
// Update the call graph by deleting the edge from Callee to Caller.
setInlineRemark(CS, "trivially dead");
- CG[Caller]->removeCallEdgeFor(CS);
+ CG[Caller]->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction()));
Instr->eraseFromParent();
++NumCallsDeleted;
} else {
@@ -974,7 +973,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
LazyCallGraph::Node &N = *CG.lookup(F);
if (CG.lookupSCC(N) != C)
continue;
- if (F.hasFnAttribute(Attribute::OptimizeNone)) {
+ if (F.hasOptNone()) {
setInlineRemark(Calls[i].first, "optnone attribute");
continue;
}
@@ -1006,8 +1005,12 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
auto GetInlineCost = [&](CallSite CS) {
Function &Callee = *CS.getCalledFunction();
auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
- return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
- PSI, &ORE);
+ bool RemarksEnabled =
+ Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
+ DEBUG_TYPE);
+ return getInlineCost(cast<CallBase>(*CS.getInstruction()), Params,
+ CalleeTTI, GetAssumptionCache, {GetBFI}, PSI,
+ RemarksEnabled ? &ORE : nullptr);
};
// Now process as many calls as we have within this caller in the sequnece.
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index a6542d28dfd8..2e269604e379 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -1,9 +1,8 @@
//===-- Internalize.cpp - Mark functions internal -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,11 +27,11 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
-#include <fstream>
-#include <set>
using namespace llvm;
#define DEBUG_TYPE "internalize"
@@ -73,18 +72,15 @@ private:
void LoadFile(StringRef Filename) {
// Load the APIFile...
- std::ifstream In(Filename.data());
- if (!In.good()) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
+ MemoryBuffer::getFile(Filename);
+ if (!Buf) {
errs() << "WARNING: Internalize couldn't load file '" << Filename
<< "'! Continuing as if it's empty.\n";
return; // Just continue as if the file were empty
}
- while (In) {
- std::string Symbol;
- In >> Symbol;
- if (!Symbol.empty())
- ExternalNames.insert(Symbol);
- }
+ for (line_iterator I(*Buf->get(), true), E; I != E; ++I)
+ ExternalNames.insert(*I);
}
};
} // end anonymous namespace
@@ -114,7 +110,7 @@ bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
}
bool InternalizePass::maybeInternalize(
- GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) {
+ GlobalValue &GV, const DenseSet<const Comdat *> &ExternalComdats) {
if (Comdat *C = GV.getComdat()) {
if (ExternalComdats.count(C))
return false;
@@ -141,7 +137,7 @@ bool InternalizePass::maybeInternalize(
// If GV is part of a comdat and is externally visible, keep track of its
// comdat so that we don't internalize any of its members.
void InternalizePass::checkComdatVisibility(
- GlobalValue &GV, std::set<const Comdat *> &ExternalComdats) {
+ GlobalValue &GV, DenseSet<const Comdat *> &ExternalComdats) {
Comdat *C = GV.getComdat();
if (!C)
return;
@@ -158,7 +154,7 @@ bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
collectUsedGlobalVariables(M, Used, false);
// Collect comdat visiblity information for the module.
- std::set<const Comdat *> ExternalComdats;
+ DenseSet<const Comdat *> ExternalComdats;
if (!M.getComdatSymbolTable().empty()) {
for (Function &F : M)
checkComdatVisibility(F, ExternalComdats);
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 733235d45a09..91c7b5f5f135 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -1,9 +1,8 @@
//===- LoopExtractor.cpp - Extract each loop into a new function ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
@@ -51,6 +51,7 @@ namespace {
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
+ AU.addUsedIfAvailable<AssumptionCacheTracker>();
}
};
}
@@ -139,7 +140,10 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
if (ShouldExtractLoop) {
if (NumLoops == 0) return Changed;
--NumLoops;
- CodeExtractor Extractor(DT, *L);
+ AssumptionCache *AC = nullptr;
+ if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>())
+ AC = ACT->lookupAssumptionCache(*L->getHeader()->getParent());
+ CodeExtractor Extractor(DT, *L, false, nullptr, nullptr, AC);
if (Extractor.extractCodeRegion() != nullptr) {
Changed = true;
// After extraction, the loop is replaced by a function call, so
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index 87c65db09517..f7371284f47e 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -1,9 +1,8 @@
//===- LowerTypeTests.cpp - type metadata lowering pass -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -549,10 +548,10 @@ ByteArrayInfo *LowerTypeTestsModule::createByteArray(BitSetInfo &BSI) {
}
void LowerTypeTestsModule::allocateByteArrays() {
- std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(),
- [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
- return BAI1.BitSize > BAI2.BitSize;
- });
+ llvm::stable_sort(ByteArrayInfos,
+ [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
+ return BAI1.BitSize > BAI2.BitSize;
+ });
std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size());
@@ -619,7 +618,7 @@ Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B,
}
Value *ByteAddr = B.CreateGEP(Int8Ty, ByteArray, BitOffset);
- Value *Byte = B.CreateLoad(ByteAddr);
+ Value *Byte = B.CreateLoad(Int8Ty, ByteAddr);
Value *ByteAndMask =
B.CreateAnd(Byte, ConstantExpr::getPtrToInt(TIL.BitMask, Int8Ty));
@@ -1553,11 +1552,10 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
// Order the sets of indices by size. The GlobalLayoutBuilder works best
// when given small index sets first.
- std::stable_sort(
- TypeMembers.begin(), TypeMembers.end(),
- [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
- return O1.size() < O2.size();
- });
+ llvm::stable_sort(TypeMembers, [](const std::set<uint64_t> &O1,
+ const std::set<uint64_t> &O2) {
+ return O1.size() < O2.size();
+ });
// Create a GlobalLayoutBuilder and provide it with index sets as layout
// fragments. The GlobalLayoutBuilder tries to lay out members of fragments as
@@ -1693,6 +1691,14 @@ void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) {
}
bool LowerTypeTestsModule::lower() {
+ // If only some of the modules were split, we cannot correctly perform
+ // this transformation. We already checked for the presense of type tests
+ // with partially split modules during the thin link, and would have emitted
+ // an error if any were found, so here we can simply return.
+ if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
+ (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
+ return false;
+
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
Function *ICallBranchFunnelFunc =
@@ -1702,13 +1708,6 @@ bool LowerTypeTestsModule::lower() {
!ExportSummary && !ImportSummary)
return false;
- // If only some of the modules were split, we cannot correctly handle
- // code that contains type tests.
- if (TypeTestFunc && !TypeTestFunc->use_empty() &&
- ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
- (ImportSummary && ImportSummary->partiallySplitLTOUnits())))
- report_fatal_error("inconsistent LTO Unit splitting with llvm.type.test");
-
if (ImportSummary) {
if (TypeTestFunc) {
for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 11efe95b10d4..3a08069dcd4a 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -1,9 +1,8 @@
//===- MergeFunctions.cpp - Merge identical functions ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -190,8 +189,6 @@ public:
void replaceBy(Function *G) const {
F = G;
}
-
- void release() { F = nullptr; }
};
/// MergeFunctions finds functions which will generate identical machine code,
@@ -281,8 +278,8 @@ private:
// Replace G with an alias to F (deleting function G)
void writeAlias(Function *F, Function *G);
- // Replace G with an alias to F if possible, or a thunk to F if
- // profitable. Returns false if neither is the case.
+ // Replace G with an alias to F if possible, or a thunk to F if possible.
+ // Returns false if neither is the case.
bool writeThunkOrAlias(Function *F, Function *G);
/// Replace function F with function G in the function tree.
@@ -383,6 +380,11 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) {
}
#endif
+/// Check whether \p F is eligible for function merging.
+static bool isEligibleForMerging(Function &F) {
+ return !F.isDeclaration() && !F.hasAvailableExternallyLinkage();
+}
+
bool MergeFunctions::runOnModule(Module &M) {
if (skipModule(M))
return false;
@@ -394,17 +396,12 @@ bool MergeFunctions::runOnModule(Module &M) {
std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
HashedFuncs;
for (Function &Func : M) {
- if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) {
+ if (isEligibleForMerging(Func)) {
HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
}
}
- std::stable_sort(
- HashedFuncs.begin(), HashedFuncs.end(),
- [](const std::pair<FunctionComparator::FunctionHash, Function *> &a,
- const std::pair<FunctionComparator::FunctionHash, Function *> &b) {
- return a.first < b.first;
- });
+ llvm::stable_sort(HashedFuncs, less_first());
auto S = HashedFuncs.begin();
for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) {
@@ -654,12 +651,16 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
LLVM_DEBUG(dbgs() << " }\n");
}
-// Don't merge tiny functions using a thunk, since it can just end up
-// making the function larger.
-static bool isThunkProfitable(Function * F) {
+/// Whether this function may be replaced by a forwarding thunk.
+static bool canCreateThunkFor(Function *F) {
+ if (F->isVarArg())
+ return false;
+
+ // Don't merge tiny functions using a thunk, since it can just end up
+ // making the function larger.
if (F->size() == 1) {
if (F->front().size() <= 2) {
- LLVM_DEBUG(dbgs() << "isThunkProfitable: " << F->getName()
+ LLVM_DEBUG(dbgs() << "canCreateThunkFor: " << F->getName()
<< " is too small to bother creating a thunk for\n");
return false;
}
@@ -695,6 +696,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
} else {
NewG = Function::Create(G->getFunctionType(), G->getLinkage(),
G->getAddressSpace(), "", G->getParent());
+ NewG->setComdat(G->getComdat());
BB = BasicBlock::Create(F->getContext(), "", NewG);
}
@@ -787,7 +789,7 @@ bool MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
writeAlias(F, G);
return true;
}
- if (isThunkProfitable(F)) {
+ if (canCreateThunkFor(F)) {
writeThunk(F, G);
return true;
}
@@ -802,9 +804,9 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
// Both writeThunkOrAlias() calls below must succeed, either because we can
// create aliases for G and NewF, or because a thunk for F is profitable.
// F here has the same signature as NewF below, so that's what we check.
- if (!isThunkProfitable(F) && (!canCreateAliasFor(F) || !canCreateAliasFor(G))) {
+ if (!canCreateThunkFor(F) &&
+ (!canCreateAliasFor(F) || !canCreateAliasFor(G)))
return;
- }
// Make them both thunks to the same internal function.
Function *NewF = Function::Create(F->getFunctionType(), F->getLinkage(),
@@ -944,25 +946,7 @@ void MergeFunctions::remove(Function *F) {
// For each instruction used by the value, remove() the function that contains
// the instruction. This should happen right before a call to RAUW.
void MergeFunctions::removeUsers(Value *V) {
- std::vector<Value *> Worklist;
- Worklist.push_back(V);
- SmallPtrSet<Value*, 8> Visited;
- Visited.insert(V);
- while (!Worklist.empty()) {
- Value *V = Worklist.back();
- Worklist.pop_back();
-
- for (User *U : V->users()) {
- if (Instruction *I = dyn_cast<Instruction>(U)) {
- remove(I->getFunction());
- } else if (isa<GlobalValue>(U)) {
- // do nothing
- } else if (Constant *C = dyn_cast<Constant>(U)) {
- for (User *UU : C->users()) {
- if (!Visited.insert(UU).second)
- Worklist.push_back(UU);
- }
- }
- }
- }
+ for (User *U : V->users())
+ if (auto *I = dyn_cast<Instruction>(U))
+ remove(I->getFunction());
}
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index da214a1d3b44..733782e8764d 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -1,9 +1,8 @@
//===- PartialInlining.cpp - Inline parts of functions --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -182,11 +181,11 @@ struct FunctionOutliningMultiRegionInfo {
// Container for outline regions
struct OutlineRegionInfo {
- OutlineRegionInfo(SmallVector<BasicBlock *, 8> Region,
+ OutlineRegionInfo(ArrayRef<BasicBlock *> Region,
BasicBlock *EntryBlock, BasicBlock *ExitBlock,
BasicBlock *ReturnBlock)
- : Region(Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock),
- ReturnBlock(ReturnBlock) {}
+ : Region(Region.begin(), Region.end()), EntryBlock(EntryBlock),
+ ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
SmallVector<BasicBlock *, 8> Region;
BasicBlock *EntryBlock;
BasicBlock *ExitBlock;
@@ -200,10 +199,12 @@ struct PartialInlinerImpl {
PartialInlinerImpl(
std::function<AssumptionCache &(Function &)> *GetAC,
+ function_ref<AssumptionCache *(Function &)> LookupAC,
std::function<TargetTransformInfo &(Function &)> *GTTI,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
ProfileSummaryInfo *ProfSI)
- : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
+ : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
+ GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
bool run(Module &M);
// Main part of the transformation that calls helper functions to find
@@ -223,9 +224,11 @@ struct PartialInlinerImpl {
// Two constructors, one for single region outlining, the other for
// multi-region outlining.
FunctionCloner(Function *F, FunctionOutliningInfo *OI,
- OptimizationRemarkEmitter &ORE);
+ OptimizationRemarkEmitter &ORE,
+ function_ref<AssumptionCache *(Function &)> LookupAC);
FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
- OptimizationRemarkEmitter &ORE);
+ OptimizationRemarkEmitter &ORE,
+ function_ref<AssumptionCache *(Function &)> LookupAC);
~FunctionCloner();
// Prepare for function outlining: making sure there is only
@@ -261,11 +264,13 @@ struct PartialInlinerImpl {
std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
OptimizationRemarkEmitter &ORE;
+ function_ref<AssumptionCache *(Function &)> LookupAC;
};
private:
int NumPartialInlining = 0;
std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
+ function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
std::function<TargetTransformInfo &(Function &)> *GetTTI;
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
ProfileSummaryInfo *PSI;
@@ -366,12 +371,17 @@ struct PartialInlinerLegacyPass : public ModulePass {
return ACT->getAssumptionCache(F);
};
+ auto LookupAssumptionCache = [ACT](Function &F) -> AssumptionCache * {
+ return ACT->lookupAssumptionCache(F);
+ };
+
std::function<TargetTransformInfo &(Function &)> GetTTI =
[&TTIWP](Function &F) -> TargetTransformInfo & {
return TTIWP->getTTI(F);
};
- return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI)
+ return PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache,
+ &GetTTI, NoneType::None, PSI)
.run(M);
}
};
@@ -525,7 +535,6 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
// assert(ReturnBlock && "ReturnBlock is NULL somehow!");
FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
- RegInfo.Region = DominateVector;
OutliningInfo->ORI.push_back(RegInfo);
#ifndef NDEBUG
if (TracePartialInlining) {
@@ -763,8 +772,13 @@ bool PartialInlinerImpl::shouldPartialInline(
Function *Caller = CS.getCaller();
auto &CalleeTTI = (*GetTTI)(*Callee);
- InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
- *GetAssumptionCache, GetBFI, PSI, &ORE);
+ bool RemarksEnabled =
+ Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
+ DEBUG_TYPE);
+ assert(Call && "invalid callsite for partial inline");
+ InlineCost IC = getInlineCost(cast<CallBase>(*Call), getInlineParams(),
+ CalleeTTI, *GetAssumptionCache, GetBFI, PSI,
+ RemarksEnabled ? &ORE : nullptr);
if (IC.isAlways()) {
ORE.emit([&]() {
@@ -798,7 +812,7 @@ bool PartialInlinerImpl::shouldPartialInline(
const DataLayout &DL = Caller->getParent()->getDataLayout();
// The savings of eliminating the call:
- int NonWeightedSavings = getCallsiteCost(CS, DL);
+ int NonWeightedSavings = getCallsiteCost(cast<CallBase>(*Call), DL);
BlockFrequency NormWeightedSavings(NonWeightedSavings);
// Weighted saving is smaller than weighted cost, return false
@@ -855,12 +869,12 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
continue;
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
- InlineCost += getCallsiteCost(CallSite(CI), DL);
+ InlineCost += getCallsiteCost(*CI, DL);
continue;
}
if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
- InlineCost += getCallsiteCost(CallSite(II), DL);
+ InlineCost += getCallsiteCost(*II, DL);
continue;
}
@@ -949,8 +963,9 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap(
}
PartialInlinerImpl::FunctionCloner::FunctionCloner(
- Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE)
- : OrigFunc(F), ORE(ORE) {
+ Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
+ function_ref<AssumptionCache *(Function &)> LookupAC)
+ : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
// Clone the function, so that we can hack away on it.
@@ -973,8 +988,9 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(
PartialInlinerImpl::FunctionCloner::FunctionCloner(
Function *F, FunctionOutliningMultiRegionInfo *OI,
- OptimizationRemarkEmitter &ORE)
- : OrigFunc(F), ORE(ORE) {
+ OptimizationRemarkEmitter &ORE,
+ function_ref<AssumptionCache *(Function &)> LookupAC)
+ : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
ClonedOMRI = llvm::make_unique<FunctionOutliningMultiRegionInfo>();
// Clone the function, so that we can hack away on it.
@@ -1112,7 +1128,9 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
int CurrentOutlinedRegionCost = ComputeRegionCost(RegionInfo.Region);
CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
- ClonedFuncBFI.get(), &BPI, /* AllowVarargs */ false);
+ ClonedFuncBFI.get(), &BPI,
+ LookupAC(*RegionInfo.EntryBlock->getParent()),
+ /* AllowVarargs */ false);
CE.findInputsOutputs(Inputs, Outputs, Sinks);
@@ -1194,7 +1212,7 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
// Extract the body of the if.
Function *OutlinedFunc =
CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
- ClonedFuncBFI.get(), &BPI,
+ ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
/* AllowVarargs */ true)
.extractCodeRegion();
@@ -1258,7 +1276,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
computeOutliningColdRegionsInfo(F, ORE);
if (OMRI) {
- FunctionCloner Cloner(F, OMRI.get(), ORE);
+ FunctionCloner Cloner(F, OMRI.get(), ORE, LookupAssumptionCache);
#ifndef NDEBUG
if (TracePartialInlining) {
@@ -1291,7 +1309,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
if (!OI)
return {false, nullptr};
- FunctionCloner Cloner(F, OI.get(), ORE);
+ FunctionCloner Cloner(F, OI.get(), ORE, LookupAssumptionCache);
Cloner.NormalizeReturnBlock();
Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
@@ -1485,6 +1503,10 @@ PreservedAnalyses PartialInlinerPass::run(Module &M,
return FAM.getResult<AssumptionAnalysis>(F);
};
+ auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
+ return FAM.getCachedResult<AssumptionAnalysis>(F);
+ };
+
std::function<BlockFrequencyInfo &(Function &)> GetBFI =
[&FAM](Function &F) -> BlockFrequencyInfo & {
return FAM.getResult<BlockFrequencyAnalysis>(F);
@@ -1497,7 +1519,8 @@ PreservedAnalyses PartialInlinerPass::run(Module &M,
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
- if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI)
+ if (PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache, &GetTTI,
+ {GetBFI}, PSI)
.run(M))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 9764944dc332..3ea77f08fd3c 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -1,9 +1,8 @@
//===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -31,6 +30,7 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
@@ -39,9 +39,13 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
+#include "llvm/Transforms/Scalar/LICM.h"
+#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Vectorize.h"
+#include "llvm/Transforms/Vectorize/LoopVectorize.h"
+#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
using namespace llvm;
@@ -50,14 +54,6 @@ static cl::opt<bool>
cl::ZeroOrMore, cl::desc("Run Partial inlinining pass"));
static cl::opt<bool>
- RunLoopVectorization("vectorize-loops", cl::Hidden,
- cl::desc("Run the Loop vectorization passes"));
-
-static cl::opt<bool>
-RunSLPVectorization("vectorize-slp", cl::Hidden,
- cl::desc("Run the SLP vectorization passes"));
-
-static cl::opt<bool>
UseGVNAfterVectorization("use-gvn-after-vectorization",
cl::init(false), cl::Hidden,
cl::desc("Run GVN instead of Early CSE after vectorization passes"));
@@ -73,12 +69,6 @@ RunLoopRerolling("reroll-loops", cl::Hidden,
static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
cl::desc("Run the NewGVN pass"));
-static cl::opt<bool>
-RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization",
- cl::init(true), cl::Hidden,
- cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop "
- "vectorizer instead of before"));
-
// Experimental option to use CFL-AA
enum class CFLAAType { None, Steensgaard, Andersen, Both };
static cl::opt<CFLAAType>
@@ -104,23 +94,13 @@ static cl::opt<bool>
EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden,
cl::desc("Enable preparation for ThinLTO."));
+static cl::opt<bool>
+ EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden,
+ cl::desc("Enable performing ThinLTO."));
+
cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false), cl::Hidden,
cl::desc("Enable hot-cold splitting pass"));
-
-static cl::opt<bool> RunPGOInstrGen(
- "profile-generate", cl::init(false), cl::Hidden,
- cl::desc("Enable PGO instrumentation."));
-
-static cl::opt<std::string>
- PGOOutputFile("profile-generate-file", cl::init(""), cl::Hidden,
- cl::desc("Specify the path of profile data file."));
-
-static cl::opt<std::string> RunPGOInstrUse(
- "profile-use", cl::init(""), cl::Hidden, cl::value_desc("filename"),
- cl::desc("Enable use phase of PGO instrumentation and specify the path "
- "of profile data file"));
-
static cl::opt<bool> UseLoopVersioningLICM(
"enable-loop-versioning-licm", cl::init(false), cl::Hidden,
cl::desc("Enable the experimental Loop Versioning LICM pass"));
@@ -134,10 +114,6 @@ static cl::opt<int> PreInlineThreshold(
cl::desc("Control the amount of inlining in pre-instrumentation inliner "
"(default = 75)"));
-static cl::opt<bool> EnableEarlyCSEMemSSA(
- "enable-earlycse-memssa", cl::init(true), cl::Hidden,
- cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = on)"));
-
static cl::opt<bool> EnableGVNHoist(
"enable-gvn-hoist", cl::init(false), cl::Hidden,
cl::desc("Enable the GVN hoisting pass (default = off)"));
@@ -156,10 +132,21 @@ static cl::opt<bool> EnableGVNSink(
"enable-gvn-sink", cl::init(false), cl::Hidden,
cl::desc("Enable the GVN sinking pass (default = off)"));
+// This option is used in simplifying testing SampleFDO optimizations for
+// profile loading.
static cl::opt<bool>
EnableCHR("enable-chr", cl::init(true), cl::Hidden,
cl::desc("Enable control height reduction optimization (CHR)"));
+cl::opt<bool> FlattenedProfileUsed(
+ "flattened-profile-used", cl::init(false), cl::Hidden,
+ cl::desc("Indicate the sample profile being used is flattened, i.e., "
+ "no inline hierachy exists in the profile. "));
+
+cl::opt<bool> EnableOrderFileInstrumentation(
+ "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
+ cl::desc("Enable order file instrumentation (default = off)"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -167,19 +154,26 @@ PassManagerBuilder::PassManagerBuilder() {
Inliner = nullptr;
DisableUnrollLoops = false;
SLPVectorize = RunSLPVectorization;
- LoopVectorize = RunLoopVectorization;
+ LoopVectorize = EnableLoopVectorization;
+ LoopsInterleaved = EnableLoopInterleaving;
RerollLoops = RunLoopRerolling;
NewGVN = RunNewGVN;
+ LicmMssaOptCap = SetLicmMssaOptCap;
+ LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
DisableGVNLoadPRE = false;
+ ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
VerifyInput = false;
VerifyOutput = false;
MergeFunctions = false;
PrepareForLTO = false;
- EnablePGOInstrGen = RunPGOInstrGen;
- PGOInstrGen = PGOOutputFile;
- PGOInstrUse = RunPGOInstrUse;
+ EnablePGOInstrGen = false;
+ EnablePGOCSInstrGen = false;
+ EnablePGOCSInstrUse = false;
+ PGOInstrGen = "";
+ PGOInstrUse = "";
+ PGOSampleUse = "";
PrepareForThinLTO = EnablePrepareForThinLTO;
- PerformThinLTO = false;
+ PerformThinLTO = EnablePerformThinLTO;
DivergentTarget = false;
}
@@ -272,13 +266,19 @@ void PassManagerBuilder::populateFunctionPassManager(
}
// Do PGO instrumentation generation or use pass as the option specified.
-void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
- if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty())
+void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM,
+ bool IsCS = false) {
+ if (IsCS) {
+ if (!EnablePGOCSInstrGen && !EnablePGOCSInstrUse)
+ return;
+ } else if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty())
return;
+
// Perform the preinline and cleanup passes for O1 and above.
// And avoid doing them if optimizing for size.
+ // We will not do this inline for context sensitive PGO (when IsCS is true).
if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner &&
- PGOSampleUse.empty()) {
+ PGOSampleUse.empty() && !IsCS) {
// Create preinline pass. We construct an InlineParams object and specify
// the threshold here to avoid the command line options of the regular
// inliner to influence pre-inlining. The only fields of InlineParams we
@@ -296,22 +296,23 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
MPM.add(createInstructionCombiningPass()); // Combine silly seq's
addExtensionsToPM(EP_Peephole, MPM);
}
- if (EnablePGOInstrGen) {
- MPM.add(createPGOInstrumentationGenLegacyPass());
+ if ((EnablePGOInstrGen && !IsCS) || (EnablePGOCSInstrGen && IsCS)) {
+ MPM.add(createPGOInstrumentationGenLegacyPass(IsCS));
// Add the profile lowering pass.
InstrProfOptions Options;
if (!PGOInstrGen.empty())
Options.InstrProfileOutput = PGOInstrGen;
Options.DoCounterPromotion = true;
+ Options.UseBFIInPromotion = IsCS;
MPM.add(createLoopRotatePass());
- MPM.add(createInstrProfilingLegacyPass(Options));
+ MPM.add(createInstrProfilingLegacyPass(Options, IsCS));
}
if (!PGOInstrUse.empty())
- MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse));
+ MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse, IsCS));
// Indirect call promotion that promotes intra-module targets only.
// For ThinLTO this is done earlier due to interactions with globalopt
// for imported functions. We don't run this at -O0.
- if (OptLevel > 0)
+ if (OptLevel > 0 && !IsCS)
MPM.add(
createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty()));
}
@@ -320,7 +321,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// Start of function pass.
// Break up aggregate allocas, using SSAUpdater.
MPM.add(createSROAPass());
- MPM.add(createEarlyCSEPass(EnableEarlyCSEMemSSA)); // Catch trivial redundancies
+ MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies
if (EnableGVNHoist)
MPM.add(createGVNHoistPass());
if (EnableGVNSink) {
@@ -359,7 +360,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
}
// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
- MPM.add(createLICMPass()); // Hoist loop invariants
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
if (EnableSimpleLoopUnswitch)
MPM.add(createSimpleLoopUnswitchLegacyPass());
else
@@ -378,8 +379,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
if (EnableLoopInterchange)
MPM.add(createLoopInterchangePass()); // Interchange loops
- MPM.add(createSimpleLoopUnrollPass(OptLevel,
- DisableUnrollLoops)); // Unroll small loops
+ // Unroll small loops
+ MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
+ ForgetAllSCEVInLoopUnroll));
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
// This ends the loop pass pipelines.
@@ -403,14 +405,12 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createJumpThreadingPass()); // Thread jumps
MPM.add(createCorrelatedValuePropagationPass());
MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
- MPM.add(createLICMPass());
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
if (RerollLoops)
MPM.add(createLoopRerollPass());
- if (!RunSLPAfterLoopVectorization && SLPVectorize)
- MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
@@ -419,15 +419,23 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
addExtensionsToPM(EP_Peephole, MPM);
if (EnableCHR && OptLevel >= 3 &&
- (!PGOInstrUse.empty() || !PGOSampleUse.empty()))
+ (!PGOInstrUse.empty() || !PGOSampleUse.empty() || EnablePGOCSInstrGen))
MPM.add(createControlHeightReductionLegacyPass());
}
void PassManagerBuilder::populateModulePassManager(
legacy::PassManagerBase &MPM) {
+ // Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link
+ // is handled separately, so just check this is not the ThinLTO post-link.
+ bool DefaultOrPreLinkPipeline = !PerformThinLTO;
+
if (!PGOSampleUse.empty()) {
MPM.add(createPruneEHPass());
- MPM.add(createSampleProfileLoaderPass(PGOSampleUse));
+ // In ThinLTO mode, when flattened profile is used, all the available
+ // profile information will be annotated in PreLink phase so there is
+ // no need to load the profile again in PostLink.
+ if (!(FlattenedProfileUsed && PerformThinLTO))
+ MPM.add(createSampleProfileLoaderPass(PGOSampleUse));
}
// Allow forcing function attributes as a debugging and tuning aid.
@@ -508,6 +516,10 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createIPSCCPPass()); // IP SCCP
MPM.add(createCalledValuePropagationPass());
+
+ // Infer attributes on declarations, call sites, arguments, etc.
+ MPM.add(createAttributorLegacyPass());
+
MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
// Promote any localized global vars.
MPM.add(createPromoteMemoryToRegisterPass());
@@ -523,9 +535,14 @@ void PassManagerBuilder::populateModulePassManager(
// profile annotation in backend more difficult.
// PGO instrumentation is added during the compile phase for ThinLTO, do
// not run it a second time
- if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile)
+ if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile)
addPGOInstrPasses(MPM);
+ // Create profile COMDAT variables. Lld linker wants to see all variables
+ // before the LTO/ThinLTO link since it needs to resolve symbols/comdats.
+ if (!PerformThinLTO && EnablePGOCSInstrGen)
+ MPM.add(createPGOInstrumentationGenCreateVarLegacyPass(PGOInstrGen));
+
// We add a module alias analysis pass here. In part due to bugs in the
// analysis infrastructure this "works" in that the analysis stays alive
// for the entire SCC pass run below.
@@ -567,6 +584,17 @@ void PassManagerBuilder::populateModulePassManager(
// and saves running remaining passes on the eliminated functions.
MPM.add(createEliminateAvailableExternallyPass());
+ // CSFDO instrumentation and use pass. Don't invoke this for Prepare pass
+ // for LTO and ThinLTO -- The actual pass will be called after all inlines
+ // are performed.
+ // Need to do this after COMDAT variables have been eliminated,
+ // (i.e. after EliminateAvailableExternallyPass).
+ if (!(PrepareForLTO || PrepareForThinLTO))
+ addPGOInstrPasses(MPM, /* IsCS */ true);
+
+ if (EnableOrderFileInstrumentation)
+ MPM.add(createInstrOrderFilePass());
+
MPM.add(createReversePostOrderFunctionAttrsPass());
// The inliner performs some kind of dead code elimination as it goes,
@@ -605,7 +633,7 @@ void PassManagerBuilder::populateModulePassManager(
// later might get benefit of no-alias assumption in clone loop.
if (UseLoopVersioningLICM) {
MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM
- MPM.add(createLICMPass()); // Hoist loop invariants
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
}
// We add a fresh GlobalsModRef run at this point. This is particularly
@@ -640,7 +668,7 @@ void PassManagerBuilder::populateModulePassManager(
// llvm.loop.distribute=true or when -enable-loop-distribute is specified.
MPM.add(createLoopDistributePass());
- MPM.add(createLoopVectorizePass(DisableUnrollLoops, !LoopVectorize));
+ MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize));
// Eliminate loads by forwarding stores from the previous iteration to loads
// of the current iteration.
@@ -662,7 +690,7 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createEarlyCSEPass());
MPM.add(createCorrelatedValuePropagationPass());
addInstructionCombiningPass(MPM);
- MPM.add(createLICMPass());
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
MPM.add(createCFGSimplificationPass());
addInstructionCombiningPass(MPM);
@@ -675,7 +703,7 @@ void PassManagerBuilder::populateModulePassManager(
// before SLP vectorization.
MPM.add(createCFGSimplificationPass(1, true, true, false, true));
- if (RunSLPAfterLoopVectorization && SLPVectorize) {
+ if (SLPVectorize) {
MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
if (OptLevel > 1 && ExtraVectorizerPasses) {
MPM.add(createEarlyCSEPass());
@@ -692,8 +720,9 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createLoopUnrollAndJamPass(OptLevel));
}
- MPM.add(createLoopUnrollPass(OptLevel,
- DisableUnrollLoops)); // Unroll small loops
+ // Unroll small loops
+ MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
+ ForgetAllSCEVInLoopUnroll));
if (!DisableUnrollLoops) {
// LoopUnroll may generate some redundency to cleanup.
@@ -703,7 +732,7 @@ void PassManagerBuilder::populateModulePassManager(
// unrolled loop is a inner loop, then the prologue will be inside the
// outer loop. LICM pass can help to promote the runtime check out if the
// checked value is loop invariant.
- MPM.add(createLICMPass());
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
}
MPM.add(createWarnMissedTransformationsPass());
@@ -722,6 +751,11 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createConstantMergePass()); // Merge dup global constants
}
+ // See comment in the new PM for justification of scheduling splitting at
+ // this stage (\ref buildModuleSimplificationPipeline).
+ if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO))
+ MPM.add(createHotColdSplittingPass());
+
if (MergeFunctions)
MPM.add(createMergeFunctionsPass());
@@ -738,9 +772,6 @@ void PassManagerBuilder::populateModulePassManager(
// flattening of blocks.
MPM.add(createDivRemPairsPass());
- if (EnableHotColdSplit)
- MPM.add(createHotColdSplittingPass());
-
// LoopSink (and other loop passes since the last simplifyCFG) might have
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
MPM.add(createCFGSimplificationPass());
@@ -793,6 +824,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Attach metadata to indirect call sites indicating the set of functions
// they may target at run-time. This should follow IPSCCP.
PM.add(createCalledValuePropagationPass());
+
+ // Infer attributes on declarations, call sites, arguments, etc.
+ PM.add(createAttributorLegacyPass());
}
// Infer attributes about definitions. The readnone attribute in particular is
@@ -842,6 +876,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createPruneEHPass()); // Remove dead EH info.
+ // CSFDO instrumentation and use pass.
+ addPGOInstrPasses(PM, /* IsCS */ true);
+
// Optimize globals again if we ran the inliner.
if (RunInliner)
PM.add(createGlobalOptimizerPass());
@@ -859,11 +896,16 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Break up allocas
PM.add(createSROAPass());
- // Run a few AA driven optimizations here and now, to cleanup the code.
+ // LTO provides additional opportunities for tailcall elimination due to
+ // link-time inlining, and visibility of nocapture attribute.
+ PM.add(createTailCallEliminationPass());
+
+ // Infer attributes on declarations, call sites, arguments, etc.
PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture.
+ // Run a few AA driven optimizations here and now, to cleanup the code.
PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
- PM.add(createLICMPass()); // Hoist loop invariants.
+ PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
PM.add(NewGVN ? createNewGVNPass()
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
@@ -878,11 +920,13 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
if (EnableLoopInterchange)
PM.add(createLoopInterchangePass());
- PM.add(createSimpleLoopUnrollPass(OptLevel,
- DisableUnrollLoops)); // Unroll small loops
+ // Unroll small loops
+ PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
+ ForgetAllSCEVInLoopUnroll));
PM.add(createLoopVectorizePass(true, !LoopVectorize));
// The vectorizer may have significantly shortened a loop body; unroll again.
- PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops));
+ PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
+ ForgetAllSCEVInLoopUnroll));
PM.add(createWarnMissedTransformationsPass());
@@ -896,9 +940,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createBitTrackingDCEPass());
// More scalar chains could be vectorized due to more alias information
- if (RunSLPAfterLoopVectorization)
- if (SLPVectorize)
- PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+ if (SLPVectorize)
+ PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
// After vectorization, assume intrinsics may tell us more about pointer
// alignments.
@@ -913,6 +956,11 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
void PassManagerBuilder::addLateLTOOptimizationPasses(
legacy::PassManagerBase &PM) {
+ // See comment in the new PM for justification of scheduling splitting at
+ // this stage (\ref buildLTODefaultPipeline).
+ if (EnableHotColdSplit)
+ PM.add(createHotColdSplittingPass());
+
// Delete basic blocks, which optimization passes may have killed.
PM.add(createCFGSimplificationPass());
@@ -968,6 +1016,8 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
if (VerifyInput)
PM.add(createVerifierPass());
+ addExtensionsToPM(EP_FullLinkTimeOptimizationEarly, PM);
+
if (OptLevel != 0)
addLTOOptimizationPasses(PM);
else {
@@ -989,6 +1039,8 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
if (OptLevel != 0)
addLateLTOOptimizationPasses(PM);
+ addExtensionsToPM(EP_FullLinkTimeOptimizationLast, PM);
+
if (VerifyOutput)
PM.add(createVerifierPass());
}
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index ae586c017471..cb3915dfb678 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -1,9 +1,8 @@
//===- PruneEH.cpp - Pass which deletes unused exception handlers ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -204,7 +203,8 @@ static bool SimplifyFunction(Function *F, CallGraph &CG) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
if (CallInst *CI = dyn_cast<CallInst>(I++))
- if (CI->doesNotReturn() && !isa<UnreachableInst>(I)) {
+ if (CI->doesNotReturn() && !CI->isMustTailCall() &&
+ !isa<UnreachableInst>(I)) {
// This call calls a function that cannot return. Insert an
// unreachable instruction after it and simplify the code. Do this
// by splitting the BB, adding the unreachable, then deleting the
@@ -242,12 +242,12 @@ static void DeleteBasicBlock(BasicBlock *BB, CallGraph &CG) {
break;
}
- if (auto CS = CallSite (&*I)) {
- const Function *Callee = CS.getCalledFunction();
+ if (auto *Call = dyn_cast<CallBase>(&*I)) {
+ const Function *Callee = Call->getCalledFunction();
if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
- CGN->removeCallEdgeFor(CS);
+ CGN->removeCallEdgeFor(*Call);
else if (!Callee->isIntrinsic())
- CGN->removeCallEdgeFor(CS);
+ CGN->removeCallEdgeFor(*Call);
}
if (!I->use_empty())
diff --git a/lib/Transforms/IPO/SCCP.cpp b/lib/Transforms/IPO/SCCP.cpp
index d2c34abfc132..7be3608bd2ec 100644
--- a/lib/Transforms/IPO/SCCP.cpp
+++ b/lib/Transforms/IPO/SCCP.cpp
@@ -79,6 +79,7 @@ char IPSCCPLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(IPSCCPLegacyPass, "ipsccp",
"Interprocedural Sparse Conditional Constant Propagation",
false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(IPSCCPLegacyPass, "ipsccp",
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index 9f123c2b875e..877d20e72ffc 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -1,9 +1,8 @@
//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -319,6 +318,14 @@ protected:
/// Optimization Remark Emitter used to emit diagnostic remarks.
OptimizationRemarkEmitter *ORE = nullptr;
+
+ // Information recorded when we declined to inline a call site
+ // because we have determined it is too cold is accumulated for
+ // each callee function. Initially this is just the entry count.
+ struct NotInlinedProfileInfo {
+ uint64_t entryCount;
+ };
+ DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo;
};
class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -745,8 +752,9 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
// when cost exceeds threshold without checking all IRs in the callee.
// The acutal cost does not matter because we only checks isNever() to
// see if it is legal to inline the callsite.
- InlineCost Cost = getInlineCost(CS, Params, GetTTI(*CalledFunction), GetAC,
- None, nullptr, nullptr);
+ InlineCost Cost =
+ getInlineCost(cast<CallBase>(*I), Params, GetTTI(*CalledFunction), GetAC,
+ None, nullptr, nullptr);
if (Cost.isNever()) {
ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB)
<< "incompatible inlining");
@@ -779,6 +787,8 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
bool SampleProfileLoader::inlineHotFunctions(
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
DenseSet<Instruction *> PromotedInsns;
+
+ DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites;
bool Changed = false;
while (true) {
bool LocalChanged = false;
@@ -791,6 +801,8 @@ bool SampleProfileLoader::inlineHotFunctions(
if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) {
Candidates.push_back(&I);
+ if (FS->getEntrySamples() > 0)
+ localNotInlinedCallSites.try_emplace(&I, FS);
if (callsiteIsHot(FS, PSI))
Hot = true;
}
@@ -823,6 +835,9 @@ bool SampleProfileLoader::inlineHotFunctions(
if (CalleeFunctionName == F.getName())
continue;
+ if (!callsiteIsHot(FS, PSI))
+ continue;
+
const char *Reason = "Callee function not available";
auto R = SymbolMap.find(CalleeFunctionName);
if (R != SymbolMap.end() && R->getValue() &&
@@ -836,8 +851,10 @@ bool SampleProfileLoader::inlineHotFunctions(
PromotedInsns.insert(I);
// If profile mismatches, we should not attempt to inline DI.
if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
- inlineCallInstruction(DI))
+ inlineCallInstruction(DI)) {
+ localNotInlinedCallSites.erase(I);
LocalChanged = true;
+ }
} else {
LLVM_DEBUG(dbgs()
<< "\nFailed to promote indirect call to "
@@ -846,8 +863,10 @@ bool SampleProfileLoader::inlineHotFunctions(
}
} else if (CalledFunction && CalledFunction->getSubprogram() &&
!CalledFunction->isDeclaration()) {
- if (inlineCallInstruction(I))
+ if (inlineCallInstruction(I)) {
+ localNotInlinedCallSites.erase(I);
LocalChanged = true;
+ }
} else if (IsThinLTOPreLink) {
findCalleeFunctionSamples(*I)->findInlinedFunctions(
InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
@@ -859,6 +878,18 @@ bool SampleProfileLoader::inlineHotFunctions(
break;
}
}
+
+ // Accumulate not inlined callsite information into notInlinedSamples
+ for (const auto &Pair : localNotInlinedCallSites) {
+ Instruction *I = Pair.getFirst();
+ Function *Callee = CallSite(I).getCalledFunction();
+ if (!Callee || Callee->isDeclaration())
+ continue;
+ const FunctionSamples *FS = Pair.getSecond();
+ auto pair =
+ notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
+ pair.first->second.entryCount += FS->getEntrySamples();
+ }
return Changed;
}
@@ -1299,10 +1330,10 @@ void SampleProfileLoader::propagateWeights(Function &F) {
annotateValueSite(*I.getParent()->getParent()->getParent(), I,
SortedCallTargets, Sum, IPVK_IndirectCallTarget,
SortedCallTargets.size());
- } else if (!dyn_cast<IntrinsicInst>(&I)) {
- SmallVector<uint32_t, 1> Weights;
- Weights.push_back(BlockWeights[BB]);
- I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+ } else if (!isa<IntrinsicInst>(&I)) {
+ I.setMetadata(LLVMContext::MD_prof,
+ MDB.createBranchWeights(
+ {static_cast<uint32_t>(BlockWeights[BB])}));
}
}
}
@@ -1568,8 +1599,9 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
return false;
PSI = _PSI;
- if (M.getProfileSummary() == nullptr)
- M.setProfileSummary(Reader->getSummary().getMD(M.getContext()));
+ if (M.getProfileSummary(/* IsCS */ false) == nullptr)
+ M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
+ ProfileSummary::PSK_Sample);
// Compute the total number of samples collected in this profile.
for (const auto &I : Reader->getProfiles())
@@ -1601,6 +1633,12 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
clearFunctionData();
retval |= runOnFunction(F, AM);
}
+
+ // Account for cold calls not inlined....
+ for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
+ notInlinedCallInfo)
+ updateProfileCallee(pair.first, pair.second.entryCount);
+
return retval;
}
diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp
index 3c3c5dd19d1f..106db3c8bd9d 100644
--- a/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -1,9 +1,8 @@
//===-- StripDeadPrototypes.cpp - Remove unused function declarations ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index c9afb060a91a..67a473612fc1 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -1,9 +1,8 @@
//===- StripSymbols.cpp - Strip symbols and debug info from a module ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/SyntheticCountsPropagation.cpp b/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
index ba4efb3ff60d..45fd432fd721 100644
--- a/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
+++ b/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
@@ -1,9 +1,8 @@
//=- SyntheticCountsPropagation.cpp - Propagate function counts --*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 510ecb516dc2..24c476376c14 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -1,9 +1,8 @@
//===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -418,34 +417,53 @@ void splitAndWriteThinLTOBitcode(
}
}
-// Returns whether this module needs to be split because splitting is
-// enabled and it uses type metadata.
-bool requiresSplit(Module &M) {
- // First check if the LTO Unit splitting has been enabled.
+// Check if the LTO Unit splitting has been enabled.
+bool enableSplitLTOUnit(Module &M) {
bool EnableSplitLTOUnit = false;
if (auto *MD = mdconst::extract_or_null<ConstantInt>(
M.getModuleFlag("EnableSplitLTOUnit")))
EnableSplitLTOUnit = MD->getZExtValue();
- if (!EnableSplitLTOUnit)
- return false;
+ return EnableSplitLTOUnit;
+}
- // Module only needs to be split if it contains type metadata.
+// Returns whether this module needs to be split because it uses type metadata.
+bool hasTypeMetadata(Module &M) {
for (auto &GO : M.global_objects()) {
if (GO.hasMetadata(LLVMContext::MD_type))
return true;
}
-
return false;
}
void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
function_ref<AAResults &(Function &)> AARGetter,
Module &M, const ModuleSummaryIndex *Index) {
- // Split module if splitting is enabled and it contains any type metadata.
- if (requiresSplit(M))
- return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
+ std::unique_ptr<ModuleSummaryIndex> NewIndex = nullptr;
+ // See if this module has any type metadata. If so, we try to split it
+ // or at least promote type ids to enable WPD.
+ if (hasTypeMetadata(M)) {
+ if (enableSplitLTOUnit(M))
+ return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
+ // Promote type ids as needed for index-based WPD.
+ std::string ModuleId = getUniqueModuleId(&M);
+ if (!ModuleId.empty()) {
+ promoteTypeIds(M, ModuleId);
+ // Need to rebuild the index so that it contains type metadata
+ // for the newly promoted type ids.
+ // FIXME: Probably should not bother building the index at all
+ // in the caller of writeThinLTOBitcode (which does so via the
+ // ModuleSummaryIndexAnalysis pass), since we have to rebuild it
+ // anyway whenever there is type metadata (here or in
+ // splitAndWriteThinLTOBitcode). Just always build it once via the
+ // buildModuleSummaryIndex when Module(s) are ready.
+ ProfileSummaryInfo PSI(M);
+ NewIndex = llvm::make_unique<ModuleSummaryIndex>(
+ buildModuleSummaryIndex(M, nullptr, &PSI));
+ Index = NewIndex.get();
+ }
+ }
- // Otherwise we can just write it out as a regular module.
+ // Write it out as an unsplit ThinLTO module.
// Save the module hash produced for the full bitcode, which will
// be used in the backends, and use that in the minimized bitcode
diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 48bd0cda759d..6b6dd6194e17 100644
--- a/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -1,9 +1,8 @@
//===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -882,7 +881,7 @@ void DevirtModule::tryICallBranchFunnel(
}
BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr);
- Constant *Intr =
+ Function *Intr =
Intrinsic::getDeclaration(&M, llvm::Intrinsic::icall_branch_funnel, {});
auto *CI = CallInst::Create(Intr, JTArgs, "", BB);
@@ -921,9 +920,10 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
NewArgs.push_back(Int8PtrTy);
for (Type *T : CS.getFunctionType()->params())
NewArgs.push_back(T);
- PointerType *NewFT = PointerType::getUnqual(
+ FunctionType *NewFT =
FunctionType::get(CS.getFunctionType()->getReturnType(), NewArgs,
- CS.getFunctionType()->isVarArg()));
+ CS.getFunctionType()->isVarArg());
+ PointerType *NewFTPtr = PointerType::getUnqual(NewFT);
IRBuilder<> IRB(CS.getInstruction());
std::vector<Value *> Args;
@@ -933,10 +933,10 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
CallSite NewCS;
if (CS.isCall())
- NewCS = IRB.CreateCall(IRB.CreateBitCast(JT, NewFT), Args);
+ NewCS = IRB.CreateCall(NewFT, IRB.CreateBitCast(JT, NewFTPtr), Args);
else
NewCS = IRB.CreateInvoke(
- IRB.CreateBitCast(JT, NewFT),
+ NewFT, IRB.CreateBitCast(JT, NewFTPtr),
cast<InvokeInst>(CS.getInstruction())->getNormalDest(),
cast<InvokeInst>(CS.getInstruction())->getUnwindDest(), Args);
NewCS.setCallingConv(CS.getCallingConv());
@@ -1183,7 +1183,7 @@ void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
Value *Addr =
B.CreateGEP(Int8Ty, B.CreateBitCast(Call.VTable, Int8PtrTy), Byte);
if (RetType->getBitWidth() == 1) {
- Value *Bits = B.CreateLoad(Addr);
+ Value *Bits = B.CreateLoad(Int8Ty, Addr);
Value *BitsAndBit = B.CreateAnd(Bits, Bit);
auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
@@ -1495,8 +1495,10 @@ void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) {
// The type of the function in the declaration is irrelevant because every
// call site will cast it to the correct type.
- auto *SingleImpl = M.getOrInsertFunction(
- Res.SingleImplName, Type::getVoidTy(M.getContext()));
+ Constant *SingleImpl =
+ cast<Constant>(M.getOrInsertFunction(Res.SingleImplName,
+ Type::getVoidTy(M.getContext()))
+ .getCallee());
// This is the import phase so we should not be exporting anything.
bool IsExported = false;
@@ -1538,8 +1540,12 @@ void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
}
if (Res.TheKind == WholeProgramDevirtResolution::BranchFunnel) {
- auto *JT = M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
- Type::getVoidTy(M.getContext()));
+ // The type of the function is irrelevant, because it's bitcast at calls
+ // anyhow.
+ Constant *JT = cast<Constant>(
+ M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
+ Type::getVoidTy(M.getContext()))
+ .getCallee());
bool IsExported = false;
applyICallBranchFunnel(SlotInfo, JT, IsExported);
assert(!IsExported);
@@ -1557,23 +1563,20 @@ void DevirtModule::removeRedundantTypeTests() {
}
bool DevirtModule::run() {
+ // If only some of the modules were split, we cannot correctly perform
+ // this transformation. We already checked for the presense of type tests
+ // with partially split modules during the thin link, and would have emitted
+ // an error if any were found, so here we can simply return.
+ if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
+ (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
+ return false;
+
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
Function *TypeCheckedLoadFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
- // If only some of the modules were split, we cannot correctly handle
- // code that contains type tests or type checked loads.
- if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
- (ImportSummary && ImportSummary->partiallySplitLTOUnits())) {
- if ((TypeTestFunc && !TypeTestFunc->use_empty()) ||
- (TypeCheckedLoadFunc && !TypeCheckedLoadFunc->use_empty()))
- report_fatal_error("inconsistent LTO Unit splitting with llvm.type.test "
- "or llvm.type.checked.load");
- return false;
- }
-
// Normally if there are no users of the devirtualization intrinsics in the
// module, this pass has nothing to do. But if we are exporting, we also need
// to handle any users that appear only in the function summaries.
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 6e196bfdbd25..ba15b023f2a3 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1,9 +1,8 @@
//===- InstCombineAddSub.cpp ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -823,6 +822,47 @@ static Value *checkForNegativeOperand(BinaryOperator &I,
return nullptr;
}
+/// Wrapping flags may allow combining constants separated by an extend.
+static Instruction *foldNoWrapAdd(BinaryOperator &Add,
+ InstCombiner::BuilderTy &Builder) {
+ Value *Op0 = Add.getOperand(0), *Op1 = Add.getOperand(1);
+ Type *Ty = Add.getType();
+ Constant *Op1C;
+ if (!match(Op1, m_Constant(Op1C)))
+ return nullptr;
+
+ // Try this match first because it results in an add in the narrow type.
+ // (zext (X +nuw C2)) + C1 --> zext (X + (C2 + trunc(C1)))
+ Value *X;
+ const APInt *C1, *C2;
+ if (match(Op1, m_APInt(C1)) &&
+ match(Op0, m_OneUse(m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C2))))) &&
+ C1->isNegative() && C1->sge(-C2->sext(C1->getBitWidth()))) {
+ Constant *NewC =
+ ConstantInt::get(X->getType(), *C2 + C1->trunc(C2->getBitWidth()));
+ return new ZExtInst(Builder.CreateNUWAdd(X, NewC), Ty);
+ }
+
+ // More general combining of constants in the wide type.
+ // (sext (X +nsw NarrowC)) + C --> (sext X) + (sext(NarrowC) + C)
+ Constant *NarrowC;
+ if (match(Op0, m_OneUse(m_SExt(m_NSWAdd(m_Value(X), m_Constant(NarrowC)))))) {
+ Constant *WideC = ConstantExpr::getSExt(NarrowC, Ty);
+ Constant *NewC = ConstantExpr::getAdd(WideC, Op1C);
+ Value *WideX = Builder.CreateSExt(X, Ty);
+ return BinaryOperator::CreateAdd(WideX, NewC);
+ }
+ // (zext (X +nuw NarrowC)) + C --> (zext X) + (zext(NarrowC) + C)
+ if (match(Op0, m_OneUse(m_ZExt(m_NUWAdd(m_Value(X), m_Constant(NarrowC)))))) {
+ Constant *WideC = ConstantExpr::getZExt(NarrowC, Ty);
+ Constant *NewC = ConstantExpr::getAdd(WideC, Op1C);
+ Value *WideX = Builder.CreateZExt(X, Ty);
+ return BinaryOperator::CreateAdd(WideX, NewC);
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
Value *Op0 = Add.getOperand(0), *Op1 = Add.getOperand(1);
Constant *Op1C;
@@ -832,7 +872,14 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
if (Instruction *NV = foldBinOpIntoSelectOrPhi(Add))
return NV;
- Value *X, *Y;
+ Value *X;
+ Constant *Op00C;
+
+ // add (sub C1, X), C2 --> sub (add C1, C2), X
+ if (match(Op0, m_Sub(m_Constant(Op00C), m_Value(X))))
+ return BinaryOperator::CreateSub(ConstantExpr::getAdd(Op00C, Op1C), X);
+
+ Value *Y;
// add (sub X, Y), -1 --> add (not Y), X
if (match(Op0, m_OneUse(m_Sub(m_Value(X), m_Value(Y)))) &&
@@ -852,6 +899,11 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
if (!match(Op1, m_APInt(C)))
return nullptr;
+ // (X | C2) + C --> (X | C2) ^ C2 iff (C2 == -C)
+ const APInt *C2;
+ if (match(Op0, m_Or(m_Value(), m_APInt(C2))) && *C2 == -*C)
+ return BinaryOperator::CreateXor(Op0, ConstantInt::get(Add.getType(), *C2));
+
if (C->isSignMask()) {
// If wrapping is not allowed, then the addition must set the sign bit:
// X + (signmask) --> X | signmask
@@ -866,19 +918,10 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
// Is this add the last step in a convoluted sext?
// add(zext(xor i16 X, -32768), -32768) --> sext X
Type *Ty = Add.getType();
- const APInt *C2;
if (match(Op0, m_ZExt(m_Xor(m_Value(X), m_APInt(C2)))) &&
C2->isMinSignedValue() && C2->sext(Ty->getScalarSizeInBits()) == *C)
return CastInst::Create(Instruction::SExt, X, Ty);
- // (add (zext (add nuw X, C2)), C) --> (zext (add nuw X, C2 + C))
- if (match(Op0, m_OneUse(m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C2))))) &&
- C->isNegative() && C->sge(-C2->sext(C->getBitWidth()))) {
- Constant *NewC =
- ConstantInt::get(X->getType(), *C2 + C->trunc(C2->getBitWidth()));
- return new ZExtInst(Builder.CreateNUWAdd(X, NewC), Ty);
- }
-
if (C->isOneValue() && Op0->hasOneUse()) {
// add (sext i1 X), 1 --> zext (not X)
// TODO: The smallest IR representation is (select X, 0, 1), and that would
@@ -1032,6 +1075,28 @@ static Instruction *canonicalizeLowbitMask(BinaryOperator &I,
return BinaryOperator::CreateNot(NotMask, I.getName());
}
+static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) {
+ assert(I.getOpcode() == Instruction::Add && "Expecting add instruction");
+ Type *Ty = I.getType();
+ auto getUAddSat = [&]() {
+ return Intrinsic::getDeclaration(I.getModule(), Intrinsic::uadd_sat, Ty);
+ };
+
+ // add (umin X, ~Y), Y --> uaddsat X, Y
+ Value *X, *Y;
+ if (match(&I, m_c_Add(m_c_UMin(m_Value(X), m_Not(m_Value(Y))),
+ m_Deferred(Y))))
+ return CallInst::Create(getUAddSat(), { X, Y });
+
+ // add (umin X, ~C), C --> uaddsat X, C
+ const APInt *C, *NotC;
+ if (match(&I, m_Add(m_UMin(m_Value(X), m_APInt(NotC)), m_APInt(C))) &&
+ *C == ~*NotC)
+ return CallInst::Create(getUAddSat(), { X, ConstantInt::get(Ty, *C) });
+
+ return nullptr;
+}
+
Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1),
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
@@ -1051,6 +1116,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (Instruction *X = foldAddWithConstant(I))
return X;
+ if (Instruction *X = foldNoWrapAdd(I, Builder))
+ return X;
+
// FIXME: This should be moved into the above helper function to allow these
// transforms for general constant or constant splat vectors.
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
@@ -1119,6 +1187,12 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateSub(RHS, A);
}
+ // Canonicalize sext to zext for better value tracking potential.
+ // add A, sext(B) --> sub A, zext(B)
+ if (match(&I, m_c_Add(m_Value(A), m_OneUse(m_SExt(m_Value(B))))) &&
+ B->getType()->isIntOrIntVectorTy(1))
+ return BinaryOperator::CreateSub(A, Builder.CreateZExt(B, Ty));
+
// A + -B --> A - B
if (match(RHS, m_Neg(m_Value(B))))
return BinaryOperator::CreateSub(LHS, B);
@@ -1128,7 +1202,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// (A + 1) + ~B --> A - B
// ~B + (A + 1) --> A - B
- if (match(&I, m_c_BinOp(m_Add(m_Value(A), m_One()), m_Not(m_Value(B)))))
+ // (~B + A) + 1 --> A - B
+ // (A + ~B) + 1 --> A - B
+ if (match(&I, m_c_BinOp(m_Add(m_Value(A), m_One()), m_Not(m_Value(B)))) ||
+ match(&I, m_BinOp(m_c_Add(m_Not(m_Value(B)), m_Value(A)), m_One())))
return BinaryOperator::CreateSub(A, B);
// X % C0 + (( X / C0 ) % C1) * C0 => X % (C0 * C1)
@@ -1225,6 +1302,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (Instruction *V = canonicalizeLowbitMask(I, Builder))
return V;
+ if (Instruction *SatAdd = foldToUnsignedSaturatedAdd(I))
+ return SatAdd;
+
return Changed ? &I : nullptr;
}
@@ -1500,6 +1580,12 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
if (match(Op1, m_OneUse(m_Add(m_Value(X), m_One()))))
return BinaryOperator::CreateAdd(Builder.CreateNot(X), Op0);
+ // Y - ~X --> (X + 1) + Y
+ if (match(Op1, m_OneUse(m_Not(m_Value(X))))) {
+ return BinaryOperator::CreateAdd(
+ Builder.CreateAdd(Op0, ConstantInt::get(I.getType(), 1)), X);
+ }
+
if (Constant *C = dyn_cast<Constant>(Op0)) {
bool IsNegate = match(C, m_ZeroInt());
Value *X;
@@ -1532,8 +1618,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
if (Instruction *R = foldOpIntoPhi(I, PN))
return R;
- // C-(X+C2) --> (C-C2)-X
Constant *C2;
+
+ // C-(C2-X) --> X+(C-C2)
+ if (match(Op1, m_Sub(m_Constant(C2), m_Value(X))))
+ return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2));
+
+ // C-(X+C2) --> (C-C2)-X
if (match(Op1, m_Add(m_Value(X), m_Constant(C2))))
return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
}
@@ -1626,9 +1717,15 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Builder.CreateNot(Y, Y->getName() + ".not"));
// 0 - (X sdiv C) -> (X sdiv -C) provided the negation doesn't overflow.
- if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && match(Op0, m_Zero()) &&
- C->isNotMinSignedValue() && !C->isOneValue())
- return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C));
+ // TODO: This could be extended to match arbitrary vector constants.
+ const APInt *DivC;
+ if (match(Op0, m_Zero()) && match(Op1, m_SDiv(m_Value(X), m_APInt(DivC))) &&
+ !DivC->isMinSignedValue() && *DivC != 1) {
+ Constant *NegDivC = ConstantInt::get(I.getType(), -(*DivC));
+ Instruction *BO = BinaryOperator::CreateSDiv(X, NegDivC);
+ BO->setIsExact(cast<BinaryOperator>(Op1)->isExact());
+ return BO;
+ }
// 0 - (X << Y) -> (-X << Y) when X is freely negatable.
if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero()))
@@ -1745,6 +1842,49 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return Changed ? &I : nullptr;
}
+/// This eliminates floating-point negation in either 'fneg(X)' or
+/// 'fsub(-0.0, X)' form by combining into a constant operand.
+static Instruction *foldFNegIntoConstant(Instruction &I) {
+ Value *X;
+ Constant *C;
+
+ // Fold negation into constant operand. This is limited with one-use because
+ // fneg is assumed better for analysis and cheaper in codegen than fmul/fdiv.
+ // -(X * C) --> X * (-C)
+ // FIXME: It's arguable whether these should be m_OneUse or not. The current
+ // belief is that the FNeg allows for better reassociation opportunities.
+ if (match(&I, m_FNeg(m_OneUse(m_FMul(m_Value(X), m_Constant(C))))))
+ return BinaryOperator::CreateFMulFMF(X, ConstantExpr::getFNeg(C), &I);
+ // -(X / C) --> X / (-C)
+ if (match(&I, m_FNeg(m_OneUse(m_FDiv(m_Value(X), m_Constant(C))))))
+ return BinaryOperator::CreateFDivFMF(X, ConstantExpr::getFNeg(C), &I);
+ // -(C / X) --> (-C) / X
+ if (match(&I, m_FNeg(m_OneUse(m_FDiv(m_Constant(C), m_Value(X))))))
+ return BinaryOperator::CreateFDivFMF(ConstantExpr::getFNeg(C), X, &I);
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitFNeg(UnaryOperator &I) {
+ Value *Op = I.getOperand(0);
+
+ if (Value *V = SimplifyFNegInst(Op, I.getFastMathFlags(),
+ SQ.getWithInstruction(&I)))
+ return replaceInstUsesWith(I, V);
+
+ if (Instruction *X = foldFNegIntoConstant(I))
+ return X;
+
+ Value *X, *Y;
+
+ // If we can ignore the sign of zeros: -(X - Y) --> (Y - X)
+ if (I.hasNoSignedZeros() &&
+ match(Op, m_OneUse(m_FSub(m_Value(X), m_Value(Y)))))
+ return BinaryOperator::CreateFSubFMF(Y, X, &I);
+
+ return nullptr;
+}
+
Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
if (Value *V = SimplifyFSubInst(I.getOperand(0), I.getOperand(1),
I.getFastMathFlags(),
@@ -1760,21 +1900,12 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
if (I.hasNoSignedZeros() && match(Op0, m_PosZeroFP()))
return BinaryOperator::CreateFNegFMF(Op1, &I);
+ if (Instruction *X = foldFNegIntoConstant(I))
+ return X;
+
Value *X, *Y;
Constant *C;
- // Fold negation into constant operand. This is limited with one-use because
- // fneg is assumed better for analysis and cheaper in codegen than fmul/fdiv.
- // -(X * C) --> X * (-C)
- if (match(&I, m_FNeg(m_OneUse(m_FMul(m_Value(X), m_Constant(C))))))
- return BinaryOperator::CreateFMulFMF(X, ConstantExpr::getFNeg(C), &I);
- // -(X / C) --> X / (-C)
- if (match(&I, m_FNeg(m_OneUse(m_FDiv(m_Value(X), m_Constant(C))))))
- return BinaryOperator::CreateFDivFMF(X, ConstantExpr::getFNeg(C), &I);
- // -(C / X) --> (-C) / X
- if (match(&I, m_FNeg(m_OneUse(m_FDiv(m_Constant(C), m_Value(X))))))
- return BinaryOperator::CreateFDivFMF(ConstantExpr::getFNeg(C), X, &I);
-
// If Op0 is not -0.0 or we can ignore -0.0: Z - (X - Y) --> Z + (Y - X)
// Canonicalize to fadd to make analysis easier.
// This can also help codegen because fadd is commutative.
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 404c2ad7e6e7..2b9859b602f4 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1,9 +1,8 @@
//===- InstCombineAndOrXor.cpp --------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -968,7 +967,7 @@ static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1,
// Can it be decomposed into icmp eq (X & Mask), 0 ?
if (llvm::decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1),
Pred, X, UnsetBitsMask,
- /*LookThruTrunc=*/false) &&
+ /*LookThroughTrunc=*/false) &&
Pred == ICmpInst::ICMP_EQ)
return true;
// Is it icmp eq (X & Mask), 0 already?
@@ -1022,6 +1021,36 @@ static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1,
CxtI.getName() + ".simplified");
}
+/// Reduce a pair of compares that check if a value has exactly 1 bit set.
+static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd,
+ InstCombiner::BuilderTy &Builder) {
+ // Handle 'and' / 'or' commutation: make the equality check the first operand.
+ if (JoinedByAnd && Cmp1->getPredicate() == ICmpInst::ICMP_NE)
+ std::swap(Cmp0, Cmp1);
+ else if (!JoinedByAnd && Cmp1->getPredicate() == ICmpInst::ICMP_EQ)
+ std::swap(Cmp0, Cmp1);
+
+ // (X != 0) && (ctpop(X) u< 2) --> ctpop(X) == 1
+ CmpInst::Predicate Pred0, Pred1;
+ Value *X;
+ if (JoinedByAnd && match(Cmp0, m_ICmp(Pred0, m_Value(X), m_ZeroInt())) &&
+ match(Cmp1, m_ICmp(Pred1, m_Intrinsic<Intrinsic::ctpop>(m_Specific(X)),
+ m_SpecificInt(2))) &&
+ Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_ULT) {
+ Value *CtPop = Cmp1->getOperand(0);
+ return Builder.CreateICmpEQ(CtPop, ConstantInt::get(CtPop->getType(), 1));
+ }
+ // (X == 0) || (ctpop(X) u> 1) --> ctpop(X) != 1
+ if (!JoinedByAnd && match(Cmp0, m_ICmp(Pred0, m_Value(X), m_ZeroInt())) &&
+ match(Cmp1, m_ICmp(Pred1, m_Intrinsic<Intrinsic::ctpop>(m_Specific(X)),
+ m_SpecificInt(1))) &&
+ Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_UGT) {
+ Value *CtPop = Cmp1->getOperand(0);
+ return Builder.CreateICmpNE(CtPop, ConstantInt::get(CtPop->getType(), 1));
+ }
+ return nullptr;
+}
+
/// Fold (icmp)&(icmp) if possible.
Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
Instruction &CxtI) {
@@ -1064,6 +1093,9 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
if (Value *V = foldSignedTruncationCheck(LHS, RHS, CxtI, Builder))
return V;
+ if (Value *V = foldIsPowerOf2(LHS, RHS, true /* JoinedByAnd */, Builder))
+ return V;
+
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
@@ -1259,6 +1291,52 @@ Value *InstCombiner::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd)
return nullptr;
}
+/// This a limited reassociation for a special case (see above) where we are
+/// checking if two values are either both NAN (unordered) or not-NAN (ordered).
+/// This could be handled more generally in '-reassociation', but it seems like
+/// an unlikely pattern for a large number of logic ops and fcmps.
+static Instruction *reassociateFCmps(BinaryOperator &BO,
+ InstCombiner::BuilderTy &Builder) {
+ Instruction::BinaryOps Opcode = BO.getOpcode();
+ assert((Opcode == Instruction::And || Opcode == Instruction::Or) &&
+ "Expecting and/or op for fcmp transform");
+
+ // There are 4 commuted variants of the pattern. Canonicalize operands of this
+ // logic op so an fcmp is operand 0 and a matching logic op is operand 1.
+ Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1), *X;
+ FCmpInst::Predicate Pred;
+ if (match(Op1, m_FCmp(Pred, m_Value(), m_AnyZeroFP())))
+ std::swap(Op0, Op1);
+
+ // Match inner binop and the predicate for combining 2 NAN checks into 1.
+ BinaryOperator *BO1;
+ FCmpInst::Predicate NanPred = Opcode == Instruction::And ? FCmpInst::FCMP_ORD
+ : FCmpInst::FCMP_UNO;
+ if (!match(Op0, m_FCmp(Pred, m_Value(X), m_AnyZeroFP())) || Pred != NanPred ||
+ !match(Op1, m_BinOp(BO1)) || BO1->getOpcode() != Opcode)
+ return nullptr;
+
+ // The inner logic op must have a matching fcmp operand.
+ Value *BO10 = BO1->getOperand(0), *BO11 = BO1->getOperand(1), *Y;
+ if (!match(BO10, m_FCmp(Pred, m_Value(Y), m_AnyZeroFP())) ||
+ Pred != NanPred || X->getType() != Y->getType())
+ std::swap(BO10, BO11);
+
+ if (!match(BO10, m_FCmp(Pred, m_Value(Y), m_AnyZeroFP())) ||
+ Pred != NanPred || X->getType() != Y->getType())
+ return nullptr;
+
+ // and (fcmp ord X, 0), (and (fcmp ord Y, 0), Z) --> and (fcmp ord X, Y), Z
+ // or (fcmp uno X, 0), (or (fcmp uno Y, 0), Z) --> or (fcmp uno X, Y), Z
+ Value *NewFCmp = Builder.CreateFCmp(Pred, X, Y);
+ if (auto *NewFCmpInst = dyn_cast<FCmpInst>(NewFCmp)) {
+ // Intersect FMF from the 2 source fcmps.
+ NewFCmpInst->copyIRFlags(Op0);
+ NewFCmpInst->andIRFlags(BO10);
+ }
+ return BinaryOperator::Create(Opcode, NewFCmp, BO11);
+}
+
/// Match De Morgan's Laws:
/// (~A & ~B) == (~(A | B))
/// (~A | ~B) == (~(A & B))
@@ -1619,6 +1697,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
// ((C1 OP zext(X)) & C2) -> zext((C1-X) & C2) if C2 fits in the bitwidth
// of X and OP behaves well when given trunc(C1) and X.
+ // TODO: Do this for vectors by using m_APInt isntead of m_ConstantInt.
switch (Op0I->getOpcode()) {
default:
break;
@@ -1629,7 +1708,10 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
case Instruction::Sub:
Value *X;
ConstantInt *C1;
- if (match(Op0I, m_c_BinOp(m_ZExt(m_Value(X)), m_ConstantInt(C1)))) {
+ // TODO: The one use restrictions could be relaxed a little if the AND
+ // is going to be removed.
+ if (match(Op0I, m_OneUse(m_c_BinOp(m_OneUse(m_ZExt(m_Value(X))),
+ m_ConstantInt(C1))))) {
if (AndRHSMask.isIntN(X->getType()->getScalarSizeInBits())) {
auto *TruncC1 = ConstantExpr::getTrunc(C1, X->getType());
Value *BinOp;
@@ -1747,6 +1829,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (Value *Res = foldLogicOfFCmps(LHS, RHS, true))
return replaceInstUsesWith(I, Res);
+ if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder))
+ return FoldedFCmps;
+
if (Instruction *CastedAnd = foldCastedBitwiseLogic(I))
return CastedAnd;
@@ -1820,14 +1905,18 @@ static Instruction *matchRotate(Instruction &Or) {
// First, find an or'd pair of opposite shifts with the same shifted operand:
// or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1)
- Value *Or0 = Or.getOperand(0), *Or1 = Or.getOperand(1);
+ BinaryOperator *Or0, *Or1;
+ if (!match(Or.getOperand(0), m_BinOp(Or0)) ||
+ !match(Or.getOperand(1), m_BinOp(Or1)))
+ return nullptr;
+
Value *ShVal, *ShAmt0, *ShAmt1;
if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal), m_Value(ShAmt0)))) ||
!match(Or1, m_OneUse(m_LogicalShift(m_Specific(ShVal), m_Value(ShAmt1)))))
return nullptr;
- auto ShiftOpcode0 = cast<BinaryOperator>(Or0)->getOpcode();
- auto ShiftOpcode1 = cast<BinaryOperator>(Or1)->getOpcode();
+ BinaryOperator::BinaryOps ShiftOpcode0 = Or0->getOpcode();
+ BinaryOperator::BinaryOps ShiftOpcode1 = Or1->getOpcode();
if (ShiftOpcode0 == ShiftOpcode1)
return nullptr;
@@ -1842,6 +1931,13 @@ static Instruction *matchRotate(Instruction &Or) {
match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
return X;
+ // Similar to above, but the shift amount may be extended after masking,
+ // so return the extended value as the parameter for the intrinsic.
+ if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
+ match(R, m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),
+ m_SpecificInt(Mask))))
+ return L;
+
return nullptr;
};
@@ -2083,6 +2179,9 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, false, Builder))
return V;
+ if (Value *V = foldIsPowerOf2(LHS, RHS, false /* JoinedByAnd */, Builder))
+ return V;
+
// This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
if (!LHSC || !RHSC)
return nullptr;
@@ -2412,6 +2511,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Value *Res = foldLogicOfFCmps(LHS, RHS, false))
return replaceInstUsesWith(I, Res);
+ if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder))
+ return FoldedFCmps;
+
if (Instruction *CastedOr = foldCastedBitwiseLogic(I))
return CastedOr;
diff --git a/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
new file mode 100644
index 000000000000..5f37a00f56cf
--- /dev/null
+++ b/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
@@ -0,0 +1,159 @@
+//===- InstCombineAtomicRMW.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for atomic rmw instructions.
+//
+//===----------------------------------------------------------------------===//
+#include "InstCombineInternal.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+
+namespace {
+/// Return true if and only if the given instruction does not modify the memory
+/// location referenced. Note that an idemptent atomicrmw may still have
+/// ordering effects on nearby instructions, or be volatile.
+/// TODO: Common w/ the version in AtomicExpandPass, and change the term used.
+/// Idemptotent is confusing in this context.
+bool isIdempotentRMW(AtomicRMWInst& RMWI) {
+ if (auto CF = dyn_cast<ConstantFP>(RMWI.getValOperand()))
+ switch(RMWI.getOperation()) {
+ case AtomicRMWInst::FAdd: // -0.0
+ return CF->isZero() && CF->isNegative();
+ case AtomicRMWInst::FSub: // +0.0
+ return CF->isZero() && !CF->isNegative();
+ default:
+ return false;
+ };
+
+ auto C = dyn_cast<ConstantInt>(RMWI.getValOperand());
+ if(!C)
+ return false;
+
+ switch(RMWI.getOperation()) {
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ return C->isZero();
+ case AtomicRMWInst::And:
+ return C->isMinusOne();
+ case AtomicRMWInst::Min:
+ return C->isMaxValue(true);
+ case AtomicRMWInst::Max:
+ return C->isMinValue(true);
+ case AtomicRMWInst::UMin:
+ return C->isMaxValue(false);
+ case AtomicRMWInst::UMax:
+ return C->isMinValue(false);
+ default:
+ return false;
+ }
+}
+
+/// Return true if the given instruction always produces a value in memory
+/// equivalent to its value operand.
+bool isSaturating(AtomicRMWInst& RMWI) {
+ if (auto CF = dyn_cast<ConstantFP>(RMWI.getValOperand()))
+ switch(RMWI.getOperation()) {
+ case AtomicRMWInst::FAdd:
+ case AtomicRMWInst::FSub:
+ return CF->isNaN();
+ default:
+ return false;
+ };
+
+ auto C = dyn_cast<ConstantInt>(RMWI.getValOperand());
+ if(!C)
+ return false;
+
+ switch(RMWI.getOperation()) {
+ default:
+ return false;
+ case AtomicRMWInst::Xchg:
+ return true;
+ case AtomicRMWInst::Or:
+ return C->isAllOnesValue();
+ case AtomicRMWInst::And:
+ return C->isZero();
+ case AtomicRMWInst::Min:
+ return C->isMinValue(true);
+ case AtomicRMWInst::Max:
+ return C->isMaxValue(true);
+ case AtomicRMWInst::UMin:
+ return C->isMinValue(false);
+ case AtomicRMWInst::UMax:
+ return C->isMaxValue(false);
+ };
+}
+}
+
+Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
+
+ // Volatile RMWs perform a load and a store, we cannot replace this by just a
+ // load or just a store. We chose not to canonicalize out of general paranoia
+ // about user expectations around volatile.
+ if (RMWI.isVolatile())
+ return nullptr;
+
+ // Any atomicrmw op which produces a known result in memory can be
+ // replaced w/an atomicrmw xchg.
+ if (isSaturating(RMWI) &&
+ RMWI.getOperation() != AtomicRMWInst::Xchg) {
+ RMWI.setOperation(AtomicRMWInst::Xchg);
+ return &RMWI;
+ }
+
+ AtomicOrdering Ordering = RMWI.getOrdering();
+ assert(Ordering != AtomicOrdering::NotAtomic &&
+ Ordering != AtomicOrdering::Unordered &&
+ "AtomicRMWs don't make sense with Unordered or NotAtomic");
+
+ // Any atomicrmw xchg with no uses can be converted to a atomic store if the
+ // ordering is compatible.
+ if (RMWI.getOperation() == AtomicRMWInst::Xchg &&
+ RMWI.use_empty()) {
+ if (Ordering != AtomicOrdering::Release &&
+ Ordering != AtomicOrdering::Monotonic)
+ return nullptr;
+ auto *SI = new StoreInst(RMWI.getValOperand(),
+ RMWI.getPointerOperand(), &RMWI);
+ SI->setAtomic(Ordering, RMWI.getSyncScopeID());
+ SI->setAlignment(DL.getABITypeAlignment(RMWI.getType()));
+ return eraseInstFromFunction(RMWI);
+ }
+
+ if (!isIdempotentRMW(RMWI))
+ return nullptr;
+
+ // We chose to canonicalize all idempotent operations to an single
+ // operation code and constant. This makes it easier for the rest of the
+ // optimizer to match easily. The choices of or w/0 and fadd w/-0.0 are
+ // arbitrary.
+ if (RMWI.getType()->isIntegerTy() &&
+ RMWI.getOperation() != AtomicRMWInst::Or) {
+ RMWI.setOperation(AtomicRMWInst::Or);
+ RMWI.setOperand(1, ConstantInt::get(RMWI.getType(), 0));
+ return &RMWI;
+ } else if (RMWI.getType()->isFloatingPointTy() &&
+ RMWI.getOperation() != AtomicRMWInst::FAdd) {
+ RMWI.setOperation(AtomicRMWInst::FAdd);
+ RMWI.setOperand(1, ConstantFP::getNegativeZero(RMWI.getType()));
+ return &RMWI;
+ }
+
+ // Check if the required ordering is compatible with an atomic load.
+ if (Ordering != AtomicOrdering::Acquire &&
+ Ordering != AtomicOrdering::Monotonic)
+ return nullptr;
+
+ LoadInst *Load = new LoadInst(RMWI.getType(), RMWI.getPointerOperand());
+ Load->setAtomic(Ordering, RMWI.getSyncScopeID());
+ Load->setAlignment(DL.getABITypeAlignment(RMWI.getType()));
+ return Load;
+}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index aeb25d530d71..4b3333affa72 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1,19 +1,19 @@
//===- InstCombineCalls.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
-// This file implements the visitCall and visitInvoke functions.
+// This file implements the visitCall, visitInvoke, and visitCallBr functions.
//
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
@@ -23,12 +23,12 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -58,6 +58,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include <algorithm>
#include <cassert>
@@ -121,6 +122,15 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
return MI;
}
+ // If we have a store to a location which is known constant, we can conclude
+ // that the store must be storing the constant value (else the memory
+ // wouldn't be constant), and this must be a noop.
+ if (AA->pointsToConstantMemory(MI->getDest())) {
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
+ return MI;
+ }
+
// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
// load/store.
ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
@@ -173,7 +183,7 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
- LoadInst *L = Builder.CreateLoad(Src);
+ LoadInst *L = Builder.CreateLoad(IntType, Src);
// Alignment from the mem intrinsic will be better, so use it.
L->setAlignment(CopySrcAlign);
if (CopyMD)
@@ -219,6 +229,15 @@ Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
return MI;
}
+ // If we have a store to a location which is known constant, we can conclude
+ // that the store must be storing the constant value (else the memory
+ // wouldn't be constant), and this must be a noop.
+ if (AA->pointsToConstantMemory(MI->getDest())) {
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
+ return MI;
+ }
+
// Extract the length and alignment and fill if they are constant.
ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
@@ -523,7 +542,8 @@ static Value *simplifyX86varShift(const IntrinsicInst &II,
return Builder.CreateAShr(Vec, ShiftVec);
}
-static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) {
+static Value *simplifyX86pack(IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder, bool IsSigned) {
Value *Arg0 = II.getArgOperand(0);
Value *Arg1 = II.getArgOperand(1);
Type *ResTy = II.getType();
@@ -534,167 +554,58 @@ static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) {
Type *ArgTy = Arg0->getType();
unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
- unsigned NumDstElts = ResTy->getVectorNumElements();
unsigned NumSrcElts = ArgTy->getVectorNumElements();
- assert(NumDstElts == (2 * NumSrcElts) && "Unexpected packing types");
+ assert(ResTy->getVectorNumElements() == (2 * NumSrcElts) &&
+ "Unexpected packing types");
- unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
- assert(ArgTy->getScalarSizeInBits() == (2 * DstScalarSizeInBits) &&
+ unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
+ assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
"Unexpected packing types");
// Constant folding.
- auto *Cst0 = dyn_cast<Constant>(Arg0);
- auto *Cst1 = dyn_cast<Constant>(Arg1);
- if (!Cst0 || !Cst1)
+ if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
return nullptr;
- SmallVector<Constant *, 32> Vals;
- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
- for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
- unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
- auto *Cst = (Elt >= NumSrcEltsPerLane) ? Cst1 : Cst0;
- auto *COp = Cst->getAggregateElement(SrcIdx);
- if (COp && isa<UndefValue>(COp)) {
- Vals.push_back(UndefValue::get(ResTy->getScalarType()));
- continue;
- }
-
- auto *CInt = dyn_cast_or_null<ConstantInt>(COp);
- if (!CInt)
- return nullptr;
-
- APInt Val = CInt->getValue();
- assert(Val.getBitWidth() == ArgTy->getScalarSizeInBits() &&
- "Unexpected constant bitwidth");
-
- if (IsSigned) {
- // PACKSS: Truncate signed value with signed saturation.
- // Source values less than dst minint are saturated to minint.
- // Source values greater than dst maxint are saturated to maxint.
- if (Val.isSignedIntN(DstScalarSizeInBits))
- Val = Val.trunc(DstScalarSizeInBits);
- else if (Val.isNegative())
- Val = APInt::getSignedMinValue(DstScalarSizeInBits);
- else
- Val = APInt::getSignedMaxValue(DstScalarSizeInBits);
- } else {
- // PACKUS: Truncate signed value with unsigned saturation.
- // Source values less than zero are saturated to zero.
- // Source values greater than dst maxuint are saturated to maxuint.
- if (Val.isIntN(DstScalarSizeInBits))
- Val = Val.trunc(DstScalarSizeInBits);
- else if (Val.isNegative())
- Val = APInt::getNullValue(DstScalarSizeInBits);
- else
- Val = APInt::getAllOnesValue(DstScalarSizeInBits);
- }
-
- Vals.push_back(ConstantInt::get(ResTy->getScalarType(), Val));
- }
- }
-
- return ConstantVector::get(Vals);
-}
-
-// Replace X86-specific intrinsics with generic floor-ceil where applicable.
-static Value *simplifyX86round(IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
- ConstantInt *Arg = nullptr;
- Intrinsic::ID IntrinsicID = II.getIntrinsicID();
-
- if (IntrinsicID == Intrinsic::x86_sse41_round_ss ||
- IntrinsicID == Intrinsic::x86_sse41_round_sd)
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(2));
- else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd)
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(4));
- else
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(1));
- if (!Arg)
- return nullptr;
- unsigned RoundControl = Arg->getZExtValue();
-
- Arg = nullptr;
- unsigned SAE = 0;
- if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512)
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(4));
- else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd)
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(5));
- else
- SAE = 4;
- if (!SAE) {
- if (!Arg)
- return nullptr;
- SAE = Arg->getZExtValue();
+ // Clamp Values - signed/unsigned both use signed clamp values, but they
+ // differ on the min/max values.
+ APInt MinValue, MaxValue;
+ if (IsSigned) {
+ // PACKSS: Truncate signed value with signed saturation.
+ // Source values less than dst minint are saturated to minint.
+ // Source values greater than dst maxint are saturated to maxint.
+ MinValue =
+ APInt::getSignedMinValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits);
+ MaxValue =
+ APInt::getSignedMaxValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits);
+ } else {
+ // PACKUS: Truncate signed value with unsigned saturation.
+ // Source values less than zero are saturated to zero.
+ // Source values greater than dst maxuint are saturated to maxuint.
+ MinValue = APInt::getNullValue(SrcScalarSizeInBits);
+ MaxValue = APInt::getLowBitsSet(SrcScalarSizeInBits, DstScalarSizeInBits);
}
- if (SAE != 4 || (RoundControl != 2 /*ceil*/ && RoundControl != 1 /*floor*/))
- return nullptr;
+ auto *MinC = Constant::getIntegerValue(ArgTy, MinValue);
+ auto *MaxC = Constant::getIntegerValue(ArgTy, MaxValue);
+ Arg0 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg0, MinC), MinC, Arg0);
+ Arg1 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg1, MinC), MinC, Arg1);
+ Arg0 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0);
+ Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1);
- Value *Src, *Dst, *Mask;
- bool IsScalar = false;
- if (IntrinsicID == Intrinsic::x86_sse41_round_ss ||
- IntrinsicID == Intrinsic::x86_sse41_round_sd ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
- IsScalar = true;
- if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
- Mask = II.getArgOperand(3);
- Value *Zero = Constant::getNullValue(Mask->getType());
- Mask = Builder.CreateAnd(Mask, 1);
- Mask = Builder.CreateICmp(ICmpInst::ICMP_NE, Mask, Zero);
- Dst = II.getArgOperand(2);
- } else
- Dst = II.getArgOperand(0);
- Src = Builder.CreateExtractElement(II.getArgOperand(1), (uint64_t)0);
- } else {
- Src = II.getArgOperand(0);
- if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_128 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_256 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_128 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_256 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512) {
- Dst = II.getArgOperand(2);
- Mask = II.getArgOperand(3);
- } else {
- Dst = Src;
- Mask = ConstantInt::getAllOnesValue(
- Builder.getIntNTy(Src->getType()->getVectorNumElements()));
- }
+ // Shuffle clamped args together at the lane level.
+ SmallVector<unsigned, 32> PackMask;
+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+ for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
+ PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane));
+ for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
+ PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
}
+ auto *Shuffle = Builder.CreateShuffleVector(Arg0, Arg1, PackMask);
- Intrinsic::ID ID = (RoundControl == 2) ? Intrinsic::ceil : Intrinsic::floor;
- Value *Res = Builder.CreateUnaryIntrinsic(ID, Src, &II);
- if (!IsScalar) {
- if (auto *C = dyn_cast<Constant>(Mask))
- if (C->isAllOnesValue())
- return Res;
- auto *MaskTy = VectorType::get(
- Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
- Mask = Builder.CreateBitCast(Mask, MaskTy);
- unsigned Width = Src->getType()->getVectorNumElements();
- if (MaskTy->getVectorNumElements() > Width) {
- uint32_t Indices[4];
- for (unsigned i = 0; i != Width; ++i)
- Indices[i] = i;
- Mask = Builder.CreateShuffleVector(Mask, Mask,
- makeArrayRef(Indices, Width));
- }
- return Builder.CreateSelect(Mask, Res, Dst);
- }
- if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
- Dst = Builder.CreateExtractElement(Dst, (uint64_t)0);
- Res = Builder.CreateSelect(Mask, Res, Dst);
- Dst = II.getArgOperand(0);
- }
- return Builder.CreateInsertElement(Dst, Res, (uint64_t)0);
+ // Truncate to dst size.
+ return Builder.CreateTrunc(Shuffle, ResTy);
}
static Value *simplifyX86movmsk(const IntrinsicInst &II,
@@ -711,43 +622,44 @@ static Value *simplifyX86movmsk(const IntrinsicInst &II,
if (!ArgTy->isVectorTy())
return nullptr;
- if (auto *C = dyn_cast<Constant>(Arg)) {
- // Extract signbits of the vector input and pack into integer result.
- APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
- for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
- auto *COp = C->getAggregateElement(I);
- if (!COp)
- return nullptr;
- if (isa<UndefValue>(COp))
- continue;
-
- auto *CInt = dyn_cast<ConstantInt>(COp);
- auto *CFp = dyn_cast<ConstantFP>(COp);
- if (!CInt && !CFp)
- return nullptr;
-
- if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
- Result.setBit(I);
- }
- return Constant::getIntegerValue(ResTy, Result);
- }
+ // Expand MOVMSK to compare/bitcast/zext:
+ // e.g. PMOVMSKB(v16i8 x):
+ // %cmp = icmp slt <16 x i8> %x, zeroinitializer
+ // %int = bitcast <16 x i1> %cmp to i16
+ // %res = zext i16 %int to i32
+ unsigned NumElts = ArgTy->getVectorNumElements();
+ Type *IntegerVecTy = VectorType::getInteger(cast<VectorType>(ArgTy));
+ Type *IntegerTy = Builder.getIntNTy(NumElts);
+
+ Value *Res = Builder.CreateBitCast(Arg, IntegerVecTy);
+ Res = Builder.CreateICmpSLT(Res, Constant::getNullValue(IntegerVecTy));
+ Res = Builder.CreateBitCast(Res, IntegerTy);
+ Res = Builder.CreateZExtOrTrunc(Res, ResTy);
+ return Res;
+}
- // Look for a sign-extended boolean source vector as the argument to this
- // movmsk. If the argument is bitcast, look through that, but make sure the
- // source of that bitcast is still a vector with the same number of elements.
- // TODO: We can also convert a bitcast with wider elements, but that requires
- // duplicating the bool source sign bits to match the number of elements
- // expected by the movmsk call.
- Arg = peekThroughBitcast(Arg);
- Value *X;
- if (Arg->getType()->isVectorTy() &&
- Arg->getType()->getVectorNumElements() == ArgTy->getVectorNumElements() &&
- match(Arg, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
- // call iM movmsk(sext <N x i1> X) --> zext (bitcast <N x i1> X to iN) to iM
- unsigned NumElts = X->getType()->getVectorNumElements();
- Type *ScalarTy = Type::getIntNTy(Arg->getContext(), NumElts);
- Value *BC = Builder.CreateBitCast(X, ScalarTy);
- return Builder.CreateZExtOrTrunc(BC, ResTy);
+static Value *simplifyX86addcarry(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ Value *CarryIn = II.getArgOperand(0);
+ Value *Op1 = II.getArgOperand(1);
+ Value *Op2 = II.getArgOperand(2);
+ Type *RetTy = II.getType();
+ Type *OpTy = Op1->getType();
+ assert(RetTy->getStructElementType(0)->isIntegerTy(8) &&
+ RetTy->getStructElementType(1) == OpTy && OpTy == Op2->getType() &&
+ "Unexpected types for x86 addcarry");
+
+ // If carry-in is zero, this is just an unsigned add with overflow.
+ if (match(CarryIn, m_ZeroInt())) {
+ Value *UAdd = Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy,
+ { Op1, Op2 });
+ // The types have to be adjusted to match the x86 call types.
+ Value *UAddResult = Builder.CreateExtractValue(UAdd, 0);
+ Value *UAddOV = Builder.CreateZExt(Builder.CreateExtractValue(UAdd, 1),
+ Builder.getInt8Ty());
+ Value *Res = UndefValue::get(RetTy);
+ Res = Builder.CreateInsertValue(Res, UAddOV, 0);
+ return Builder.CreateInsertValue(Res, UAddResult, 1);
}
return nullptr;
@@ -892,7 +804,7 @@ static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,
if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
Value *Args[] = {Op0, CILength, CIIndex};
Module *M = II.getModule();
- Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
+ Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
return Builder.CreateCall(F, Args);
}
}
@@ -993,7 +905,7 @@ static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,
Value *Args[] = {Op0, Op1, CILength, CIIndex};
Module *M = II.getModule();
- Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
+ Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
return Builder.CreateCall(F, Args);
}
@@ -1134,82 +1046,42 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II,
return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
}
-/// Decode XOP integer vector comparison intrinsics.
-static Value *simplifyX86vpcom(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder,
- bool IsSigned) {
- if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
- uint64_t Imm = CInt->getZExtValue() & 0x7;
- VectorType *VecTy = cast<VectorType>(II.getType());
- CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
-
- switch (Imm) {
- case 0x0:
- Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
- break;
- case 0x1:
- Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
- break;
- case 0x2:
- Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
- break;
- case 0x3:
- Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
- break;
- case 0x4:
- Pred = ICmpInst::ICMP_EQ; break;
- case 0x5:
- Pred = ICmpInst::ICMP_NE; break;
- case 0x6:
- return ConstantInt::getSigned(VecTy, 0); // FALSE
- case 0x7:
- return ConstantInt::getSigned(VecTy, -1); // TRUE
- }
-
- if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0),
- II.getArgOperand(1)))
- return Builder.CreateSExtOrTrunc(Cmp, VecTy);
- }
- return nullptr;
-}
+// TODO, Obvious Missing Transforms:
+// * Narrow width by halfs excluding zero/undef lanes
+Value *InstCombiner::simplifyMaskedLoad(IntrinsicInst &II) {
+ Value *LoadPtr = II.getArgOperand(0);
+ unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
-static bool maskIsAllOneOrUndef(Value *Mask) {
- auto *ConstMask = dyn_cast<Constant>(Mask);
- if (!ConstMask)
- return false;
- if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
- return true;
- for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
- ++I) {
- if (auto *MaskElt = ConstMask->getAggregateElement(I))
- if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
- continue;
- return false;
- }
- return true;
-}
-
-static Value *simplifyMaskedLoad(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
// If the mask is all ones or undefs, this is a plain vector load of the 1st
// argument.
- if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
- Value *LoadPtr = II.getArgOperand(0);
- unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
- return Builder.CreateAlignedLoad(LoadPtr, Alignment, "unmaskedload");
+ if (maskIsAllOneOrUndef(II.getArgOperand(2)))
+ return Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
+ "unmaskedload");
+
+ // If we can unconditionally load from this address, replace with a
+ // load/select idiom. TODO: use DT for context sensitive query
+ if (isDereferenceableAndAlignedPointer(LoadPtr, II.getType(), Alignment,
+ II.getModule()->getDataLayout(),
+ &II, nullptr)) {
+ Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
+ "unmaskedload");
+ return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
}
return nullptr;
}
-static Instruction *simplifyMaskedStore(IntrinsicInst &II, InstCombiner &IC) {
+// TODO, Obvious Missing Transforms:
+// * Single constant active lane -> store
+// * Narrow width by halfs excluding zero/undef lanes
+Instruction *InstCombiner::simplifyMaskedStore(IntrinsicInst &II) {
auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
if (!ConstMask)
return nullptr;
// If the mask is all zeros, this instruction does nothing.
if (ConstMask->isNullValue())
- return IC.eraseInstFromFunction(II);
+ return eraseInstFromFunction(II);
// If the mask is all ones, this is a plain vector store of the 1st argument.
if (ConstMask->isAllOnesValue()) {
@@ -1218,14 +1090,57 @@ static Instruction *simplifyMaskedStore(IntrinsicInst &II, InstCombiner &IC) {
return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
}
+ // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
+ APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
+ APInt UndefElts(DemandedElts.getBitWidth(), 0);
+ if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0),
+ DemandedElts, UndefElts)) {
+ II.setOperand(0, V);
+ return &II;
+ }
+
return nullptr;
}
-static Instruction *simplifyMaskedGather(IntrinsicInst &II, InstCombiner &IC) {
- // If the mask is all zeros, return the "passthru" argument of the gather.
- auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
- if (ConstMask && ConstMask->isNullValue())
- return IC.replaceInstUsesWith(II, II.getArgOperand(3));
+// TODO, Obvious Missing Transforms:
+// * Single constant active lane load -> load
+// * Dereferenceable address & few lanes -> scalarize speculative load/selects
+// * Adjacent vector addresses -> masked.load
+// * Narrow width by halfs excluding zero/undef lanes
+// * Vector splat address w/known mask -> scalar load
+// * Vector incrementing address -> vector masked load
+Instruction *InstCombiner::simplifyMaskedGather(IntrinsicInst &II) {
+ return nullptr;
+}
+
+// TODO, Obvious Missing Transforms:
+// * Single constant active lane -> store
+// * Adjacent vector addresses -> masked.store
+// * Narrow store width by halfs excluding zero/undef lanes
+// * Vector splat address w/known mask -> scalar store
+// * Vector incrementing address -> vector masked store
+Instruction *InstCombiner::simplifyMaskedScatter(IntrinsicInst &II) {
+ auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
+ if (!ConstMask)
+ return nullptr;
+
+ // If the mask is all zeros, a scatter does nothing.
+ if (ConstMask->isNullValue())
+ return eraseInstFromFunction(II);
+
+ // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
+ APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
+ APInt UndefElts(DemandedElts.getBitWidth(), 0);
+ if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0),
+ DemandedElts, UndefElts)) {
+ II.setOperand(0, V);
+ return &II;
+ }
+ if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1),
+ DemandedElts, UndefElts)) {
+ II.setOperand(1, V);
+ return &II;
+ }
return nullptr;
}
@@ -1264,25 +1179,41 @@ static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
return cast<Instruction>(Result);
}
-static Instruction *simplifyMaskedScatter(IntrinsicInst &II, InstCombiner &IC) {
- // If the mask is all zeros, a scatter does nothing.
- auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
- if (ConstMask && ConstMask->isNullValue())
- return IC.eraseInstFromFunction(II);
-
- return nullptr;
-}
-
static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
assert((II.getIntrinsicID() == Intrinsic::cttz ||
II.getIntrinsicID() == Intrinsic::ctlz) &&
"Expected cttz or ctlz intrinsic");
+ bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
Value *Op0 = II.getArgOperand(0);
+ Value *X;
+ // ctlz(bitreverse(x)) -> cttz(x)
+ // cttz(bitreverse(x)) -> ctlz(x)
+ if (match(Op0, m_BitReverse(m_Value(X)))) {
+ Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
+ Function *F = Intrinsic::getDeclaration(II.getModule(), ID, II.getType());
+ return CallInst::Create(F, {X, II.getArgOperand(1)});
+ }
+
+ if (IsTZ) {
+ // cttz(-x) -> cttz(x)
+ if (match(Op0, m_Neg(m_Value(X)))) {
+ II.setOperand(0, X);
+ return &II;
+ }
+
+ // cttz(abs(x)) -> cttz(x)
+ // cttz(nabs(x)) -> cttz(x)
+ Value *Y;
+ SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor;
+ if (SPF == SPF_ABS || SPF == SPF_NABS) {
+ II.setOperand(0, X);
+ return &II;
+ }
+ }
KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
// Create a mask for bits above (ctlz) or below (cttz) the first known one.
- bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
: Known.countMaxLeadingZeros();
unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
@@ -1328,6 +1259,14 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) {
assert(II.getIntrinsicID() == Intrinsic::ctpop &&
"Expected ctpop intrinsic");
Value *Op0 = II.getArgOperand(0);
+ Value *X;
+ // ctpop(bitreverse(x)) -> ctpop(x)
+ // ctpop(bswap(x)) -> ctpop(x)
+ if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X)))) {
+ II.setOperand(0, X);
+ return &II;
+ }
+
// FIXME: Try to simplify vectors of integers.
auto *IT = dyn_cast<IntegerType>(Op0->getType());
if (!IT)
@@ -1513,7 +1452,7 @@ static Value *simplifyNeonVld1(const IntrinsicInst &II,
auto *BCastInst = Builder.CreateBitCast(II.getArgOperand(0),
PointerType::get(II.getType(), 0));
- return Builder.CreateAlignedLoad(BCastInst, Alignment);
+ return Builder.CreateAlignedLoad(II.getType(), BCastInst, Alignment);
}
// Returns true iff the 2 intrinsics have the same operands, limiting the
@@ -1827,8 +1766,18 @@ static Instruction *canonicalizeConstantArg0ToArg1(CallInst &Call) {
return nullptr;
}
+Instruction *InstCombiner::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
+ WithOverflowInst *WO = cast<WithOverflowInst>(II);
+ Value *OperationResult = nullptr;
+ Constant *OverflowResult = nullptr;
+ if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
+ WO->getRHS(), *WO, OperationResult, OverflowResult))
+ return CreateOverflowTuple(WO, OperationResult, OverflowResult);
+ return nullptr;
+}
+
/// CallInst simplification. This mostly only handles folding of intrinsic
-/// instructions. For normal calls, it allows visitCallSite to do the heavy
+/// instructions. For normal calls, it allows visitCallBase to do the heavy
/// lifting.
Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI)))
@@ -1845,10 +1794,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
- if (!II) return visitCallSite(&CI);
+ if (!II) return visitCallBase(CI);
- // Intrinsics cannot occur in an invoke, so handle them here instead of in
- // visitCallSite.
+ // Intrinsics cannot occur in an invoke or a callbr, so handle them here
+ // instead of in visitCallBase.
if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
bool Changed = false;
@@ -1908,6 +1857,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (Changed) return II;
}
+ // For vector result intrinsics, use the generic demanded vector support.
+ if (II->getType()->isVectorTy()) {
+ auto VWidth = II->getType()->getVectorNumElements();
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
+ if (V != II)
+ return replaceInstUsesWith(*II, V);
+ return II;
+ }
+ }
+
if (Instruction *I = SimplifyNVVMIntrinsic(II, *this))
return I;
@@ -1918,12 +1879,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
};
- switch (II->getIntrinsicID()) {
+ Intrinsic::ID IID = II->getIntrinsicID();
+ switch (IID) {
default: break;
case Intrinsic::objectsize:
- if (ConstantInt *N =
- lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
- return replaceInstUsesWith(CI, N);
+ if (Value *V = lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
+ return replaceInstUsesWith(CI, V);
return nullptr;
case Intrinsic::bswap: {
Value *IIOperand = II->getArgOperand(0);
@@ -1940,15 +1901,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
case Intrinsic::masked_load:
- if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))
+ if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
return replaceInstUsesWith(CI, SimplifiedMaskedOp);
break;
case Intrinsic::masked_store:
- return simplifyMaskedStore(*II, *this);
+ return simplifyMaskedStore(*II);
case Intrinsic::masked_gather:
- return simplifyMaskedGather(*II, *this);
+ return simplifyMaskedGather(*II);
case Intrinsic::masked_scatter:
- return simplifyMaskedScatter(*II, *this);
+ return simplifyMaskedScatter(*II);
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
@@ -1982,33 +1943,62 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::fshl:
case Intrinsic::fshr: {
- const APInt *SA;
- if (match(II->getArgOperand(2), m_APInt(SA))) {
- Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
- unsigned BitWidth = SA->getBitWidth();
- uint64_t ShiftAmt = SA->urem(BitWidth);
- assert(ShiftAmt != 0 && "SimplifyCall should have handled zero shift");
- // Normalize to funnel shift left.
- if (II->getIntrinsicID() == Intrinsic::fshr)
- ShiftAmt = BitWidth - ShiftAmt;
-
- // fshl(X, 0, C) -> shl X, C
- // fshl(X, undef, C) -> shl X, C
- if (match(Op1, m_Zero()) || match(Op1, m_Undef()))
- return BinaryOperator::CreateShl(
- Op0, ConstantInt::get(II->getType(), ShiftAmt));
-
- // fshl(0, X, C) -> lshr X, (BW-C)
- // fshl(undef, X, C) -> lshr X, (BW-C)
- if (match(Op0, m_Zero()) || match(Op0, m_Undef()))
- return BinaryOperator::CreateLShr(
- Op1, ConstantInt::get(II->getType(), BitWidth - ShiftAmt));
+ Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
+ Type *Ty = II->getType();
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ Constant *ShAmtC;
+ if (match(II->getArgOperand(2), m_Constant(ShAmtC)) &&
+ !isa<ConstantExpr>(ShAmtC) && !ShAmtC->containsConstantExpression()) {
+ // Canonicalize a shift amount constant operand to modulo the bit-width.
+ Constant *WidthC = ConstantInt::get(Ty, BitWidth);
+ Constant *ModuloC = ConstantExpr::getURem(ShAmtC, WidthC);
+ if (ModuloC != ShAmtC) {
+ II->setArgOperand(2, ModuloC);
+ return II;
+ }
+ assert(ConstantExpr::getICmp(ICmpInst::ICMP_UGT, WidthC, ShAmtC) ==
+ ConstantInt::getTrue(CmpInst::makeCmpResultType(Ty)) &&
+ "Shift amount expected to be modulo bitwidth");
+
+ // Canonicalize funnel shift right by constant to funnel shift left. This
+ // is not entirely arbitrary. For historical reasons, the backend may
+ // recognize rotate left patterns but miss rotate right patterns.
+ if (IID == Intrinsic::fshr) {
+ // fshr X, Y, C --> fshl X, Y, (BitWidth - C)
+ Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
+ Module *Mod = II->getModule();
+ Function *Fshl = Intrinsic::getDeclaration(Mod, Intrinsic::fshl, Ty);
+ return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
+ }
+ assert(IID == Intrinsic::fshl &&
+ "All funnel shifts by simple constants should go left");
+
+ // fshl(X, 0, C) --> shl X, C
+ // fshl(X, undef, C) --> shl X, C
+ if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
+ return BinaryOperator::CreateShl(Op0, ShAmtC);
+
+ // fshl(0, X, C) --> lshr X, (BW-C)
+ // fshl(undef, X, C) --> lshr X, (BW-C)
+ if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
+ return BinaryOperator::CreateLShr(Op1,
+ ConstantExpr::getSub(WidthC, ShAmtC));
+
+ // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
+ if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
+ Module *Mod = II->getModule();
+ Function *Bswap = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Ty);
+ return CallInst::Create(Bswap, { Op0 });
+ }
}
+ // Left or right might be masked.
+ if (SimplifyDemandedInstructionBits(*II))
+ return &CI;
+
// The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
// so only the low bits of the shift amount are demanded if the bitwidth is
// a power-of-2.
- unsigned BitWidth = II->getType()->getScalarSizeInBits();
if (!isPowerOf2_32(BitWidth))
break;
APInt Op2Demanded = APInt::getLowBitsSet(BitWidth, Log2_32_Ceil(BitWidth));
@@ -2018,7 +2008,34 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
case Intrinsic::uadd_with_overflow:
- case Intrinsic::sadd_with_overflow:
+ case Intrinsic::sadd_with_overflow: {
+ if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
+ return I;
+ if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
+ return I;
+
+ // Given 2 constant operands whose sum does not overflow:
+ // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
+ // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
+ Value *X;
+ const APInt *C0, *C1;
+ Value *Arg0 = II->getArgOperand(0);
+ Value *Arg1 = II->getArgOperand(1);
+ bool IsSigned = IID == Intrinsic::sadd_with_overflow;
+ bool HasNWAdd = IsSigned ? match(Arg0, m_NSWAdd(m_Value(X), m_APInt(C0)))
+ : match(Arg0, m_NUWAdd(m_Value(X), m_APInt(C0)));
+ if (HasNWAdd && match(Arg1, m_APInt(C1))) {
+ bool Overflow;
+ APInt NewC =
+ IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
+ if (!Overflow)
+ return replaceInstUsesWith(
+ *II, Builder.CreateBinaryIntrinsic(
+ IID, X, ConstantInt::get(Arg1->getType(), NewC)));
+ }
+ break;
+ }
+
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
@@ -2026,16 +2043,29 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
LLVM_FALLTHROUGH;
case Intrinsic::usub_with_overflow:
+ if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
+ return I;
+ break;
+
case Intrinsic::ssub_with_overflow: {
- OverflowCheckFlavor OCF =
- IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID());
- assert(OCF != OCF_INVALID && "unexpected!");
+ if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
+ return I;
- Value *OperationResult = nullptr;
- Constant *OverflowResult = nullptr;
- if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
- *II, OperationResult, OverflowResult))
- return CreateOverflowTuple(II, OperationResult, OverflowResult);
+ Constant *C;
+ Value *Arg0 = II->getArgOperand(0);
+ Value *Arg1 = II->getArgOperand(1);
+ // Given a constant C that is not the minimum signed value
+ // for an integer of a given bit width:
+ //
+ // ssubo X, C -> saddo X, -C
+ if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
+ Value *NegVal = ConstantExpr::getNeg(C);
+ // Build a saddo call that is equivalent to the discovered
+ // ssubo call.
+ return replaceInstUsesWith(
+ *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
+ Arg0, NegVal));
+ }
break;
}
@@ -2047,39 +2077,32 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
LLVM_FALLTHROUGH;
case Intrinsic::usub_sat:
case Intrinsic::ssub_sat: {
- Value *Arg0 = II->getArgOperand(0);
- Value *Arg1 = II->getArgOperand(1);
- Intrinsic::ID IID = II->getIntrinsicID();
+ SaturatingInst *SI = cast<SaturatingInst>(II);
+ Type *Ty = SI->getType();
+ Value *Arg0 = SI->getLHS();
+ Value *Arg1 = SI->getRHS();
// Make use of known overflow information.
- OverflowResult OR;
- switch (IID) {
- default:
- llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::uadd_sat:
- OR = computeOverflowForUnsignedAdd(Arg0, Arg1, II);
- if (OR == OverflowResult::NeverOverflows)
- return BinaryOperator::CreateNUWAdd(Arg0, Arg1);
- if (OR == OverflowResult::AlwaysOverflows)
- return replaceInstUsesWith(*II,
- ConstantInt::getAllOnesValue(II->getType()));
- break;
- case Intrinsic::usub_sat:
- OR = computeOverflowForUnsignedSub(Arg0, Arg1, II);
- if (OR == OverflowResult::NeverOverflows)
- return BinaryOperator::CreateNUWSub(Arg0, Arg1);
- if (OR == OverflowResult::AlwaysOverflows)
- return replaceInstUsesWith(*II,
- ConstantInt::getNullValue(II->getType()));
- break;
- case Intrinsic::sadd_sat:
- if (willNotOverflowSignedAdd(Arg0, Arg1, *II))
- return BinaryOperator::CreateNSWAdd(Arg0, Arg1);
- break;
- case Intrinsic::ssub_sat:
- if (willNotOverflowSignedSub(Arg0, Arg1, *II))
- return BinaryOperator::CreateNSWSub(Arg0, Arg1);
- break;
+ OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
+ Arg0, Arg1, SI);
+ switch (OR) {
+ case OverflowResult::MayOverflow:
+ break;
+ case OverflowResult::NeverOverflows:
+ if (SI->isSigned())
+ return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
+ else
+ return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
+ case OverflowResult::AlwaysOverflowsLow: {
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
+ return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
+ }
+ case OverflowResult::AlwaysOverflowsHigh: {
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
+ return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
+ }
}
// ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
@@ -2101,7 +2124,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
APInt NewVal;
bool IsUnsigned =
IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
- if (Other->getIntrinsicID() == II->getIntrinsicID() &&
+ if (Other->getIntrinsicID() == IID &&
match(Arg1, m_APInt(Val)) &&
match(Other->getArgOperand(0), m_Value(X)) &&
match(Other->getArgOperand(1), m_APInt(Val2))) {
@@ -2136,7 +2159,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return I;
Value *Arg0 = II->getArgOperand(0);
Value *Arg1 = II->getArgOperand(1);
- Intrinsic::ID IID = II->getIntrinsicID();
Value *X, *Y;
if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
(Arg0->hasOneUse() || Arg1->hasOneUse())) {
@@ -2266,8 +2288,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Value *ExtSrc;
if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
// Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
- Value *NarrowII =
- Builder.CreateUnaryIntrinsic(II->getIntrinsicID(), ExtSrc, II);
+ Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
return new FPExtInst(NarrowII, II->getType());
}
break;
@@ -2302,7 +2323,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
&DT) >= 16) {
Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
- return new LoadInst(Ptr);
+ return new LoadInst(II->getType(), Ptr);
}
break;
case Intrinsic::ppc_vsx_lxvw4x:
@@ -2310,7 +2331,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Turn PPC VSX loads into normal loads.
Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
- return new LoadInst(Ptr, Twine(""), false, 1);
+ return new LoadInst(II->getType(), Ptr, Twine(""), false, 1);
}
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
@@ -2338,7 +2359,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
II->getType()->getVectorNumElements());
Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(VTy));
- Value *Load = Builder.CreateLoad(Ptr);
+ Value *Load = Builder.CreateLoad(VTy, Ptr);
return new FPExtInst(Load, II->getType());
}
break;
@@ -2348,7 +2369,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
&DT) >= 32) {
Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
- return new LoadInst(Ptr);
+ return new LoadInst(II->getType(), Ptr);
}
break;
case Intrinsic::ppc_qpx_qvstfs:
@@ -2499,22 +2520,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
- case Intrinsic::x86_sse41_round_ps:
- case Intrinsic::x86_sse41_round_pd:
- case Intrinsic::x86_avx_round_ps_256:
- case Intrinsic::x86_avx_round_pd_256:
- case Intrinsic::x86_avx512_mask_rndscale_ps_128:
- case Intrinsic::x86_avx512_mask_rndscale_ps_256:
- case Intrinsic::x86_avx512_mask_rndscale_ps_512:
- case Intrinsic::x86_avx512_mask_rndscale_pd_128:
- case Intrinsic::x86_avx512_mask_rndscale_pd_256:
- case Intrinsic::x86_avx512_mask_rndscale_pd_512:
- case Intrinsic::x86_avx512_mask_rndscale_ss:
- case Intrinsic::x86_avx512_mask_rndscale_sd:
- if (Value *V = simplifyX86round(*II, Builder))
- return replaceInstUsesWith(*II, V);
- break;
-
case Intrinsic::x86_mmx_pmovmskb:
case Intrinsic::x86_sse_movmsk_ps:
case Intrinsic::x86_sse2_movmsk_pd:
@@ -2620,7 +2625,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Value *Arg1 = II->getArgOperand(1);
Value *V;
- switch (II->getIntrinsicID()) {
+ switch (IID) {
default: llvm_unreachable("Case stmts out of sync!");
case Intrinsic::x86_avx512_add_ps_512:
case Intrinsic::x86_avx512_add_pd_512:
@@ -2664,7 +2669,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Value *RHS = Builder.CreateExtractElement(Arg1, (uint64_t)0);
Value *V;
- switch (II->getIntrinsicID()) {
+ switch (IID) {
default: llvm_unreachable("Case stmts out of sync!");
case Intrinsic::x86_avx512_mask_add_ss_round:
case Intrinsic::x86_avx512_mask_add_sd_round:
@@ -2706,44 +2711,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return replaceInstUsesWith(*II, V);
}
}
- LLVM_FALLTHROUGH;
-
- // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
- case Intrinsic::x86_avx512_mask_max_ss_round:
- case Intrinsic::x86_avx512_mask_min_ss_round:
- case Intrinsic::x86_avx512_mask_max_sd_round:
- case Intrinsic::x86_avx512_mask_min_sd_round:
- case Intrinsic::x86_sse_cmp_ss:
- case Intrinsic::x86_sse_min_ss:
- case Intrinsic::x86_sse_max_ss:
- case Intrinsic::x86_sse2_cmp_sd:
- case Intrinsic::x86_sse2_min_sd:
- case Intrinsic::x86_sse2_max_sd:
- case Intrinsic::x86_xop_vfrcz_ss:
- case Intrinsic::x86_xop_vfrcz_sd: {
- unsigned VWidth = II->getType()->getVectorNumElements();
- APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
- if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
- if (V != II)
- return replaceInstUsesWith(*II, V);
- return II;
- }
- break;
- }
- case Intrinsic::x86_sse41_round_ss:
- case Intrinsic::x86_sse41_round_sd: {
- unsigned VWidth = II->getType()->getVectorNumElements();
- APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
- if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
- if (V != II)
- return replaceInstUsesWith(*II, V);
- return II;
- } else if (Value *V = simplifyX86round(*II, Builder))
- return replaceInstUsesWith(*II, V);
break;
- }
// Constant fold ashr( <A x Bi>, Ci ).
// Constant fold lshr( <A x Bi>, Ci ).
@@ -2860,7 +2828,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx2_packsswb:
case Intrinsic::x86_avx512_packssdw_512:
case Intrinsic::x86_avx512_packsswb_512:
- if (Value *V = simplifyX86pack(*II, true))
+ if (Value *V = simplifyX86pack(*II, Builder, true))
return replaceInstUsesWith(*II, V);
break;
@@ -2870,7 +2838,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx2_packuswb:
case Intrinsic::x86_avx512_packusdw_512:
case Intrinsic::x86_avx512_packuswb_512:
- if (Value *V = simplifyX86pack(*II, false))
+ if (Value *V = simplifyX86pack(*II, Builder, false))
return replaceInstUsesWith(*II, V);
break;
@@ -3168,19 +3136,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return nullptr;
break;
- case Intrinsic::x86_xop_vpcomb:
- case Intrinsic::x86_xop_vpcomd:
- case Intrinsic::x86_xop_vpcomq:
- case Intrinsic::x86_xop_vpcomw:
- if (Value *V = simplifyX86vpcom(*II, Builder, true))
- return replaceInstUsesWith(*II, V);
- break;
-
- case Intrinsic::x86_xop_vpcomub:
- case Intrinsic::x86_xop_vpcomud:
- case Intrinsic::x86_xop_vpcomuq:
- case Intrinsic::x86_xop_vpcomuw:
- if (Value *V = simplifyX86vpcom(*II, Builder, false))
+ case Intrinsic::x86_addcarry_32:
+ case Intrinsic::x86_addcarry_64:
+ if (Value *V = simplifyX86addcarry(*II, Builder))
return replaceInstUsesWith(*II, V);
break;
@@ -3296,8 +3254,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
// Check for constant LHS & RHS - in this case we just simplify.
- bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
- II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
+ bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
+ IID == Intrinsic::aarch64_neon_umull);
VectorType *NewVT = cast<VectorType>(II->getType());
if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
@@ -3374,7 +3332,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
APFloat Significand = frexp(C->getValueAPF(), Exp,
APFloat::rmNearestTiesToEven);
- if (II->getIntrinsicID() == Intrinsic::amdgcn_frexp_mant) {
+ if (IID == Intrinsic::amdgcn_frexp_mant) {
return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(),
Significand));
}
@@ -3559,7 +3517,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
}
- bool Signed = II->getIntrinsicID() == Intrinsic::amdgcn_sbfe;
+ bool Signed = IID == Intrinsic::amdgcn_sbfe;
if (!CWidth || !COffset)
break;
@@ -3587,15 +3545,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
case Intrinsic::amdgcn_exp:
case Intrinsic::amdgcn_exp_compr: {
- ConstantInt *En = dyn_cast<ConstantInt>(II->getArgOperand(1));
- if (!En) // Illegal.
- break;
-
+ ConstantInt *En = cast<ConstantInt>(II->getArgOperand(1));
unsigned EnBits = En->getZExtValue();
if (EnBits == 0xf)
break; // All inputs enabled.
- bool IsCompr = II->getIntrinsicID() == Intrinsic::amdgcn_exp_compr;
+ bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
bool Changed = false;
for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
@@ -3680,13 +3635,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
case Intrinsic::amdgcn_icmp:
case Intrinsic::amdgcn_fcmp: {
- const ConstantInt *CC = dyn_cast<ConstantInt>(II->getArgOperand(2));
- if (!CC)
- break;
-
+ const ConstantInt *CC = cast<ConstantInt>(II->getArgOperand(2));
// Guard against invalid arguments.
int64_t CCVal = CC->getZExtValue();
- bool IsInteger = II->getIntrinsicID() == Intrinsic::amdgcn_icmp;
+ bool IsInteger = IID == Intrinsic::amdgcn_icmp;
if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
(!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
@@ -3709,7 +3661,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// register (which contains the bitmask of live threads). So a
// comparison that always returns true is the same as a read of the
// EXEC register.
- Value *NewF = Intrinsic::getDeclaration(
+ Function *NewF = Intrinsic::getDeclaration(
II->getModule(), Intrinsic::read_register, II->getType());
Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
MDNode *MD = MDNode::get(II->getContext(), MDArgs);
@@ -3804,8 +3756,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
} else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
break;
- Value *NewF = Intrinsic::getDeclaration(II->getModule(), NewIID,
- SrcLHS->getType());
+ Function *NewF =
+ Intrinsic::getDeclaration(II->getModule(), NewIID,
+ { II->getType(),
+ SrcLHS->getType() });
Value *Args[] = { SrcLHS, SrcRHS,
ConstantInt::get(CC->getType(), SrcPred) };
CallInst *NewCall = Builder.CreateCall(NewF, Args);
@@ -3833,11 +3787,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::amdgcn_update_dpp: {
Value *Old = II->getArgOperand(0);
- auto BC = dyn_cast<ConstantInt>(II->getArgOperand(5));
- auto RM = dyn_cast<ConstantInt>(II->getArgOperand(3));
- auto BM = dyn_cast<ConstantInt>(II->getArgOperand(4));
- if (!BC || !RM || !BM ||
- BC->isZeroValue() ||
+ auto BC = cast<ConstantInt>(II->getArgOperand(5));
+ auto RM = cast<ConstantInt>(II->getArgOperand(3));
+ auto BM = cast<ConstantInt>(II->getArgOperand(4));
+ if (BC->isZeroValue() ||
RM->getZExtValue() != 0xF ||
BM->getZExtValue() != 0xF ||
isa<UndefValue>(Old))
@@ -3847,6 +3800,37 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
II->setOperand(0, UndefValue::get(Old->getType()));
return II;
}
+ case Intrinsic::amdgcn_readfirstlane:
+ case Intrinsic::amdgcn_readlane: {
+ // A constant value is trivially uniform.
+ if (Constant *C = dyn_cast<Constant>(II->getArgOperand(0)))
+ return replaceInstUsesWith(*II, C);
+
+ // The rest of these may not be safe if the exec may not be the same between
+ // the def and use.
+ Value *Src = II->getArgOperand(0);
+ Instruction *SrcInst = dyn_cast<Instruction>(Src);
+ if (SrcInst && SrcInst->getParent() != II->getParent())
+ break;
+
+ // readfirstlane (readfirstlane x) -> readfirstlane x
+ // readlane (readfirstlane x), y -> readfirstlane x
+ if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readfirstlane>()))
+ return replaceInstUsesWith(*II, Src);
+
+ if (IID == Intrinsic::amdgcn_readfirstlane) {
+ // readfirstlane (readlane x, y) -> readlane x, y
+ if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readlane>()))
+ return replaceInstUsesWith(*II, Src);
+ } else {
+ // readlane (readlane x, y), y -> readlane x, y
+ if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readlane>(
+ m_Value(), m_Specific(II->getArgOperand(1)))))
+ return replaceInstUsesWith(*II, Src);
+ }
+
+ break;
+ }
case Intrinsic::stackrestore: {
// If the save is right next to the restore, remove the restore. This can
// happen when variable allocas are DCE'd.
@@ -3870,14 +3854,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
+ if (auto *II2 = dyn_cast<IntrinsicInst>(BCI)) {
// If there is a stackrestore below this one, remove this one.
- if (II->getIntrinsicID() == Intrinsic::stackrestore)
+ if (II2->getIntrinsicID() == Intrinsic::stackrestore)
return eraseInstFromFunction(CI);
// Bail if we cross over an intrinsic with side effects, such as
// llvm.stacksave, llvm.read_register, or llvm.setjmp.
- if (II->mayHaveSideEffects()) {
+ if (II2->mayHaveSideEffects()) {
CannotRemove = true;
break;
}
@@ -3920,16 +3904,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Canonicalize assume(a && b) -> assume(a); assume(b);
// Note: New assumption intrinsics created here are registered by
// the InstCombineIRInserter object.
- Value *AssumeIntrinsic = II->getCalledValue(), *A, *B;
+ FunctionType *AssumeIntrinsicTy = II->getFunctionType();
+ Value *AssumeIntrinsic = II->getCalledValue();
+ Value *A, *B;
if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
- Builder.CreateCall(AssumeIntrinsic, A, II->getName());
- Builder.CreateCall(AssumeIntrinsic, B, II->getName());
+ Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, II->getName());
+ Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());
return eraseInstFromFunction(*II);
}
// assume(!(a || b)) -> assume(!a); assume(!b);
if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
- Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(A), II->getName());
- Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(B), II->getName());
+ Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
+ Builder.CreateNot(A), II->getName());
+ Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
+ Builder.CreateNot(B), II->getName());
return eraseInstFromFunction(*II);
}
@@ -4036,7 +4024,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
}
- return visitCallSite(II);
+ return visitCallBase(*II);
}
// Fence instruction simplification
@@ -4051,12 +4039,17 @@ Instruction *InstCombiner::visitFenceInst(FenceInst &FI) {
// InvokeInst simplification
Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
- return visitCallSite(&II);
+ return visitCallBase(II);
+}
+
+// CallBrInst simplification
+Instruction *InstCombiner::visitCallBrInst(CallBrInst &CBI) {
+ return visitCallBase(CBI);
}
/// If this cast does not affect the value passed through the varargs area, we
/// can eliminate the use of the cast.
-static bool isSafeToEliminateVarargsCast(const CallSite CS,
+static bool isSafeToEliminateVarargsCast(const CallBase &Call,
const DataLayout &DL,
const CastInst *const CI,
const int ix) {
@@ -4068,18 +4061,20 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
// TODO: This is probably something which should be expanded to all
// intrinsics since the entire point of intrinsics is that
// they are understandable by the optimizer.
- if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS))
+ if (isStatepoint(&Call) || isGCRelocate(&Call) || isGCResult(&Call))
return false;
// The size of ByVal or InAlloca arguments is derived from the type, so we
// can't change to a type with a different size. If the size were
// passed explicitly we could avoid this check.
- if (!CS.isByValOrInAllocaArgument(ix))
+ if (!Call.isByValOrInAllocaArgument(ix))
return true;
Type* SrcTy =
cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
- Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
+ Type *DstTy = Call.isByValArgument(ix)
+ ? Call.getParamByValType(ix)
+ : cast<PointerType>(CI->getType())->getElementType();
if (!SrcTy->isSized() || !DstTy->isSized())
return false;
if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
@@ -4096,7 +4091,7 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
auto InstCombineErase = [this](Instruction *I) {
eraseInstFromFunction(*I);
};
- LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW,
+ LibCallSimplifier Simplifier(DL, &TLI, ORE, BFI, PSI, InstCombineRAUW,
InstCombineErase);
if (Value *With = Simplifier.optimizeCall(CI)) {
++NumSimplified;
@@ -4182,10 +4177,10 @@ static IntrinsicInst *findInitTrampoline(Value *Callee) {
return nullptr;
}
-/// Improvements for call and invoke instructions.
-Instruction *InstCombiner::visitCallSite(CallSite CS) {
- if (isAllocLikeFn(CS.getInstruction(), &TLI))
- return visitAllocSite(*CS.getInstruction());
+/// Improvements for call, callbr and invoke instructions.
+Instruction *InstCombiner::visitCallBase(CallBase &Call) {
+ if (isAllocLikeFn(&Call, &TLI))
+ return visitAllocSite(Call);
bool Changed = false;
@@ -4195,52 +4190,50 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
SmallVector<unsigned, 4> ArgNos;
unsigned ArgNo = 0;
- for (Value *V : CS.args()) {
+ for (Value *V : Call.args()) {
if (V->getType()->isPointerTy() &&
- !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
- isKnownNonZero(V, DL, 0, &AC, CS.getInstruction(), &DT))
+ !Call.paramHasAttr(ArgNo, Attribute::NonNull) &&
+ isKnownNonZero(V, DL, 0, &AC, &Call, &DT))
ArgNos.push_back(ArgNo);
ArgNo++;
}
- assert(ArgNo == CS.arg_size() && "sanity check");
+ assert(ArgNo == Call.arg_size() && "sanity check");
if (!ArgNos.empty()) {
- AttributeList AS = CS.getAttributes();
- LLVMContext &Ctx = CS.getInstruction()->getContext();
+ AttributeList AS = Call.getAttributes();
+ LLVMContext &Ctx = Call.getContext();
AS = AS.addParamAttribute(Ctx, ArgNos,
Attribute::get(Ctx, Attribute::NonNull));
- CS.setAttributes(AS);
+ Call.setAttributes(AS);
Changed = true;
}
// If the callee is a pointer to a function, attempt to move any casts to the
- // arguments of the call/invoke.
- Value *Callee = CS.getCalledValue();
- if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
+ // arguments of the call/callbr/invoke.
+ Value *Callee = Call.getCalledValue();
+ if (!isa<Function>(Callee) && transformConstExprCastCall(Call))
return nullptr;
if (Function *CalleeF = dyn_cast<Function>(Callee)) {
// Remove the convergent attr on calls when the callee is not convergent.
- if (CS.isConvergent() && !CalleeF->isConvergent() &&
+ if (Call.isConvergent() && !CalleeF->isConvergent() &&
!CalleeF->isIntrinsic()) {
- LLVM_DEBUG(dbgs() << "Removing convergent attr from instr "
- << CS.getInstruction() << "\n");
- CS.setNotConvergent();
- return CS.getInstruction();
+ LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
+ << "\n");
+ Call.setNotConvergent();
+ return &Call;
}
// If the call and callee calling conventions don't match, this call must
// be unreachable, as the call is undefined.
- if (CalleeF->getCallingConv() != CS.getCallingConv() &&
+ if (CalleeF->getCallingConv() != Call.getCallingConv() &&
// Only do this for calls to a function with a body. A prototype may
// not actually end up matching the implementation's calling conv for a
// variety of reasons (e.g. it may be written in assembly).
!CalleeF->isDeclaration()) {
- Instruction *OldCall = CS.getInstruction();
- new StoreInst(ConstantInt::getTrue(Callee->getContext()),
- UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
- OldCall);
+ Instruction *OldCall = &Call;
+ CreateNonTerminatorUnreachable(OldCall);
// If OldCall does not return void then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
if (!OldCall->getType()->isVoidTy())
@@ -4248,40 +4241,35 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
if (isa<CallInst>(OldCall))
return eraseInstFromFunction(*OldCall);
- // We cannot remove an invoke, because it would change the CFG, just
- // change the callee to a null pointer.
- cast<InvokeInst>(OldCall)->setCalledFunction(
- Constant::getNullValue(CalleeF->getType()));
+ // We cannot remove an invoke or a callbr, because it would change thexi
+ // CFG, just change the callee to a null pointer.
+ cast<CallBase>(OldCall)->setCalledFunction(
+ CalleeF->getFunctionType(),
+ Constant::getNullValue(CalleeF->getType()));
return nullptr;
}
}
if ((isa<ConstantPointerNull>(Callee) &&
- !NullPointerIsDefined(CS.getInstruction()->getFunction())) ||
+ !NullPointerIsDefined(Call.getFunction())) ||
isa<UndefValue>(Callee)) {
- // If CS does not return void then replaceAllUsesWith undef.
+ // If Call does not return void then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
- if (!CS.getInstruction()->getType()->isVoidTy())
- replaceInstUsesWith(*CS.getInstruction(),
- UndefValue::get(CS.getInstruction()->getType()));
+ if (!Call.getType()->isVoidTy())
+ replaceInstUsesWith(Call, UndefValue::get(Call.getType()));
- if (isa<InvokeInst>(CS.getInstruction())) {
- // Can't remove an invoke because we cannot change the CFG.
+ if (Call.isTerminator()) {
+ // Can't remove an invoke or callbr because we cannot change the CFG.
return nullptr;
}
- // This instruction is not reachable, just remove it. We insert a store to
- // undef so that we know that this code is not reachable, despite the fact
- // that we can't modify the CFG here.
- new StoreInst(ConstantInt::getTrue(Callee->getContext()),
- UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
- CS.getInstruction());
-
- return eraseInstFromFunction(*CS.getInstruction());
+ // This instruction is not reachable, just remove it.
+ CreateNonTerminatorUnreachable(&Call);
+ return eraseInstFromFunction(Call);
}
if (IntrinsicInst *II = findInitTrampoline(Callee))
- return transformCallThroughTrampoline(CS, II);
+ return transformCallThroughTrampoline(Call, *II);
PointerType *PTy = cast<PointerType>(Callee->getType());
FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
@@ -4289,39 +4277,48 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
int ix = FTy->getNumParams();
// See if we can optimize any arguments passed through the varargs area of
// the call.
- for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
- E = CS.arg_end(); I != E; ++I, ++ix) {
+ for (auto I = Call.arg_begin() + FTy->getNumParams(), E = Call.arg_end();
+ I != E; ++I, ++ix) {
CastInst *CI = dyn_cast<CastInst>(*I);
- if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
+ if (CI && isSafeToEliminateVarargsCast(Call, DL, CI, ix)) {
*I = CI->getOperand(0);
+
+ // Update the byval type to match the argument type.
+ if (Call.isByValArgument(ix)) {
+ Call.removeParamAttr(ix, Attribute::ByVal);
+ Call.addParamAttr(
+ ix, Attribute::getWithByValType(
+ Call.getContext(),
+ CI->getOperand(0)->getType()->getPointerElementType()));
+ }
Changed = true;
}
}
}
- if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
+ if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
// Inline asm calls cannot throw - mark them 'nounwind'.
- CS.setDoesNotThrow();
+ Call.setDoesNotThrow();
Changed = true;
}
// Try to optimize the call if possible, we require DataLayout for most of
// this. None of these calls are seen as possibly dead so go ahead and
// delete the instruction now.
- if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
+ if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
Instruction *I = tryOptimizeCall(CI);
// If we changed something return the result, etc. Otherwise let
// the fallthrough check.
if (I) return eraseInstFromFunction(*I);
}
- return Changed ? CS.getInstruction() : nullptr;
+ return Changed ? &Call : nullptr;
}
/// If the callee is a constexpr cast of a function, attempt to move the cast to
-/// the arguments of the call/invoke.
-bool InstCombiner::transformConstExprCastCall(CallSite CS) {
- auto *Callee = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+/// the arguments of the call/callbr/invoke.
+bool InstCombiner::transformConstExprCastCall(CallBase &Call) {
+ auto *Callee = dyn_cast<Function>(Call.getCalledValue()->stripPointerCasts());
if (!Callee)
return false;
@@ -4335,11 +4332,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// prototype with the exception of pointee types. The code below doesn't
// implement that, so we can't do this transform.
// TODO: Do the transform if it only requires adding pointer casts.
- if (CS.isMustTailCall())
+ if (Call.isMustTailCall())
return false;
- Instruction *Caller = CS.getInstruction();
- const AttributeList &CallerPAL = CS.getAttributes();
+ Instruction *Caller = &Call;
+ const AttributeList &CallerPAL = Call.getAttributes();
// Okay, this is a cast from a function to a different type. Unless doing so
// would cause a type conversion of one of our arguments, change this call to
@@ -4370,20 +4367,24 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
return false; // Attribute not compatible with transformed value.
}
- // If the callsite is an invoke instruction, and the return value is used by
- // a PHI node in a successor, we cannot change the return type of the call
- // because there is no place to put the cast instruction (without breaking
- // the critical edge). Bail out in this case.
- if (!Caller->use_empty())
+ // If the callbase is an invoke/callbr instruction, and the return value is
+ // used by a PHI node in a successor, we cannot change the return type of
+ // the call because there is no place to put the cast instruction (without
+ // breaking the critical edge). Bail out in this case.
+ if (!Caller->use_empty()) {
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
for (User *U : II->users())
if (PHINode *PN = dyn_cast<PHINode>(U))
if (PN->getParent() == II->getNormalDest() ||
PN->getParent() == II->getUnwindDest())
return false;
+ // FIXME: Be conservative for callbr to avoid a quadratic search.
+ if (isa<CallBrInst>(Caller))
+ return false;
+ }
}
- unsigned NumActualArgs = CS.arg_size();
+ unsigned NumActualArgs = Call.arg_size();
unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
// Prevent us turning:
@@ -4398,7 +4399,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
return false;
- CallSite::arg_iterator AI = CS.arg_begin();
+ auto AI = Call.arg_begin();
for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
Type *ParamTy = FT->getParamType(i);
Type *ActTy = (*AI)->getType();
@@ -4410,7 +4411,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
.overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
return false; // Attribute not compatible with transformed value.
- if (CS.isInAllocaArgument(i))
+ if (Call.isInAllocaArgument(i))
return false; // Cannot transform to and from inalloca.
// If the parameter is passed as a byval argument, then we have to have a
@@ -4420,7 +4421,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (!ParamPTy || !ParamPTy->getElementType()->isSized())
return false;
- Type *CurElTy = ActTy->getPointerElementType();
+ Type *CurElTy = Call.getParamByValType(i);
if (DL.getTypeAllocSize(CurElTy) !=
DL.getTypeAllocSize(ParamPTy->getElementType()))
return false;
@@ -4435,7 +4436,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// If the callee is just a declaration, don't change the varargsness of the
// call. We don't want to introduce a varargs call where one doesn't
// already exist.
- PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
+ PointerType *APTy = cast<PointerType>(Call.getCalledValue()->getType());
if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
return false;
@@ -4474,7 +4475,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// with the existing attributes. Wipe out any problematic attributes.
RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
- AI = CS.arg_begin();
+ LLVMContext &Ctx = Call.getContext();
+ AI = Call.arg_begin();
for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
Type *ParamTy = FT->getParamType(i);
@@ -4484,7 +4486,12 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
Args.push_back(NewArg);
// Add any parameter attributes.
- ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
+ if (CallerPAL.hasParamAttribute(i, Attribute::ByVal)) {
+ AttrBuilder AB(CallerPAL.getParamAttributes(i));
+ AB.addByValAttr(NewArg->getType()->getPointerElementType());
+ ArgAttrs.push_back(AttributeSet::get(Ctx, AB));
+ } else
+ ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
}
// If the function takes more arguments than the call was taking, add them
@@ -4523,45 +4530,50 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
"missing argument attributes");
- LLVMContext &Ctx = Callee->getContext();
AttributeList NewCallerPAL = AttributeList::get(
Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
SmallVector<OperandBundleDef, 1> OpBundles;
- CS.getOperandBundlesAsDefs(OpBundles);
+ Call.getOperandBundlesAsDefs(OpBundles);
- CallSite NewCS;
+ CallBase *NewCall;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
- NewCS = Builder.CreateInvoke(Callee, II->getNormalDest(),
- II->getUnwindDest(), Args, OpBundles);
+ NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
+ II->getUnwindDest(), Args, OpBundles);
+ } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) {
+ NewCall = Builder.CreateCallBr(Callee, CBI->getDefaultDest(),
+ CBI->getIndirectDests(), Args, OpBundles);
} else {
- NewCS = Builder.CreateCall(Callee, Args, OpBundles);
- cast<CallInst>(NewCS.getInstruction())
- ->setTailCallKind(cast<CallInst>(Caller)->getTailCallKind());
+ NewCall = Builder.CreateCall(Callee, Args, OpBundles);
+ cast<CallInst>(NewCall)->setTailCallKind(
+ cast<CallInst>(Caller)->getTailCallKind());
}
- NewCS->takeName(Caller);
- NewCS.setCallingConv(CS.getCallingConv());
- NewCS.setAttributes(NewCallerPAL);
+ NewCall->takeName(Caller);
+ NewCall->setCallingConv(Call.getCallingConv());
+ NewCall->setAttributes(NewCallerPAL);
// Preserve the weight metadata for the new call instruction. The metadata
// is used by SamplePGO to check callsite's hotness.
uint64_t W;
if (Caller->extractProfTotalWeight(W))
- NewCS->setProfWeight(W);
+ NewCall->setProfWeight(W);
// Insert a cast of the return type as necessary.
- Instruction *NC = NewCS.getInstruction();
+ Instruction *NC = NewCall;
Value *NV = NC;
if (OldRetTy != NV->getType() && !Caller->use_empty()) {
if (!NV->getType()->isVoidTy()) {
NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
NC->setDebugLoc(Caller->getDebugLoc());
- // If this is an invoke instruction, we should insert it after the first
- // non-phi, instruction in the normal successor block.
+ // If this is an invoke/callbr instruction, we should insert it after the
+ // first non-phi instruction in the normal successor block.
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
InsertNewInstBefore(NC, *I);
+ } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) {
+ BasicBlock::iterator I = CBI->getDefaultDest()->getFirstInsertionPt();
+ InsertNewInstBefore(NC, *I);
} else {
// Otherwise, it's a call, just insert cast right after the call.
InsertNewInstBefore(NC, *Caller);
@@ -4590,23 +4602,20 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
/// Turn a call to a function created by init_trampoline / adjust_trampoline
/// intrinsic pair into a direct call to the underlying function.
Instruction *
-InstCombiner::transformCallThroughTrampoline(CallSite CS,
- IntrinsicInst *Tramp) {
- Value *Callee = CS.getCalledValue();
- PointerType *PTy = cast<PointerType>(Callee->getType());
- FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
- AttributeList Attrs = CS.getAttributes();
+InstCombiner::transformCallThroughTrampoline(CallBase &Call,
+ IntrinsicInst &Tramp) {
+ Value *Callee = Call.getCalledValue();
+ Type *CalleeTy = Callee->getType();
+ FunctionType *FTy = Call.getFunctionType();
+ AttributeList Attrs = Call.getAttributes();
// If the call already has the 'nest' attribute somewhere then give up -
// otherwise 'nest' would occur twice after splicing in the chain.
if (Attrs.hasAttrSomewhere(Attribute::Nest))
return nullptr;
- assert(Tramp &&
- "transformCallThroughTrampoline called with incorrect CallSite.");
-
- Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
- FunctionType *NestFTy = cast<FunctionType>(NestF->getValueType());
+ Function *NestF = cast<Function>(Tramp.getArgOperand(1)->stripPointerCasts());
+ FunctionType *NestFTy = NestF->getFunctionType();
AttributeList NestAttrs = NestF->getAttributes();
if (!NestAttrs.isEmpty()) {
@@ -4628,22 +4637,21 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
}
if (NestTy) {
- Instruction *Caller = CS.getInstruction();
std::vector<Value*> NewArgs;
std::vector<AttributeSet> NewArgAttrs;
- NewArgs.reserve(CS.arg_size() + 1);
- NewArgAttrs.reserve(CS.arg_size());
+ NewArgs.reserve(Call.arg_size() + 1);
+ NewArgAttrs.reserve(Call.arg_size());
// Insert the nest argument into the call argument list, which may
// mean appending it. Likewise for attributes.
{
unsigned ArgNo = 0;
- CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ auto I = Call.arg_begin(), E = Call.arg_end();
do {
if (ArgNo == NestArgNo) {
// Add the chain argument and attributes.
- Value *NestVal = Tramp->getArgOperand(2);
+ Value *NestVal = Tramp.getArgOperand(2);
if (NestVal->getType() != NestTy)
NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
NewArgs.push_back(NestVal);
@@ -4705,24 +4713,30 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
Attrs.getRetAttributes(), NewArgAttrs);
SmallVector<OperandBundleDef, 1> OpBundles;
- CS.getOperandBundlesAsDefs(OpBundles);
+ Call.getOperandBundlesAsDefs(OpBundles);
Instruction *NewCaller;
- if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
- NewCaller = InvokeInst::Create(NewCallee,
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
+ NewCaller = InvokeInst::Create(NewFTy, NewCallee,
II->getNormalDest(), II->getUnwindDest(),
NewArgs, OpBundles);
cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
+ } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
+ NewCaller =
+ CallBrInst::Create(NewFTy, NewCallee, CBI->getDefaultDest(),
+ CBI->getIndirectDests(), NewArgs, OpBundles);
+ cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
+ cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
} else {
- NewCaller = CallInst::Create(NewCallee, NewArgs, OpBundles);
+ NewCaller = CallInst::Create(NewFTy, NewCallee, NewArgs, OpBundles);
cast<CallInst>(NewCaller)->setTailCallKind(
- cast<CallInst>(Caller)->getTailCallKind());
+ cast<CallInst>(Call).getTailCallKind());
cast<CallInst>(NewCaller)->setCallingConv(
- cast<CallInst>(Caller)->getCallingConv());
+ cast<CallInst>(Call).getCallingConv());
cast<CallInst>(NewCaller)->setAttributes(NewPAL);
}
- NewCaller->setDebugLoc(Caller->getDebugLoc());
+ NewCaller->setDebugLoc(Call.getDebugLoc());
return NewCaller;
}
@@ -4731,9 +4745,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
// Replace the trampoline call with a direct call. Since there is no 'nest'
// parameter, there is no need to adjust the argument list. Let the generic
// code sort out any function type mismatches.
- Constant *NewCallee =
- NestF->getType() == PTy ? NestF :
- ConstantExpr::getBitCast(NestF, PTy);
- CS.setCalledFunction(NewCallee);
- return CS.getInstruction();
+ Constant *NewCallee = ConstantExpr::getBitCast(NestF, CalleeTy);
+ Call.setCalledFunction(FTy, NewCallee);
+ return &Call;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 1201ac196ec0..2c9ba203fbf3 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1,9 +1,8 @@
//===- InstCombineCasts.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1373,10 +1372,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// If we know that the value being extended is positive, we can use a zext
// instead.
KnownBits Known = computeKnownBits(Src, 0, &CI);
- if (Known.isNonNegative()) {
- Value *ZExt = Builder.CreateZExt(Src, DestTy);
- return replaceInstUsesWith(CI, ZExt);
- }
+ if (Known.isNonNegative())
+ return CastInst::Create(Instruction::ZExt, Src, DestTy);
// Try to extend the entire expression tree to the wide destination type.
if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
@@ -1618,12 +1615,20 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
return CastInst::CreateFPCast(ExactResult, Ty);
}
}
+ }
- // (fptrunc (fneg x)) -> (fneg (fptrunc x))
- Value *X;
- if (match(OpI, m_FNeg(m_Value(X)))) {
+ // (fptrunc (fneg x)) -> (fneg (fptrunc x))
+ Value *X;
+ Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
+ if (Op && Op->hasOneUse()) {
+ if (match(Op, m_FNeg(m_Value(X)))) {
Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
- return BinaryOperator::CreateFNegFMF(InnerTrunc, OpI);
+
+ // FIXME: Once we're sure that unary FNeg optimizations are on par with
+ // binary FNeg, this should always return a unary operator.
+ if (isa<BinaryOperator>(Op))
+ return BinaryOperator::CreateFNegFMF(InnerTrunc, Op);
+ return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
}
}
@@ -1657,8 +1662,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
II->getIntrinsicID(), Ty);
SmallVector<OperandBundleDef, 1> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
- CallInst *NewCI = CallInst::Create(Overload, { InnerTrunc }, OpBundles,
- II->getName());
+ CallInst *NewCI =
+ CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
NewCI->copyFastMathFlags(II);
return NewCI;
}
@@ -2167,7 +2172,7 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
SmallSetVector<PHINode *, 4> OldPhiNodes;
// Find all of the A->B casts and PHI nodes.
- // We need to inpect all related PHI nodes, but PHIs can be cyclic, so
+ // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
// OldPhiNodes is used to track all known PHI nodes, before adding a new
// PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
PhiWorklist.push_back(PN);
@@ -2242,20 +2247,43 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
}
}
+ // Traverse all accumulated PHI nodes and process its users,
+ // which are Stores and BitcCasts. Without this processing
+ // NewPHI nodes could be replicated and could lead to extra
+ // moves generated after DeSSA.
// If there is a store with type B, change it to type A.
- for (User *U : PN->users()) {
- auto *SI = dyn_cast<StoreInst>(U);
- if (SI && SI->isSimple() && SI->getOperand(0) == PN) {
- Builder.SetInsertPoint(SI);
- auto *NewBC =
- cast<BitCastInst>(Builder.CreateBitCast(NewPNodes[PN], SrcTy));
- SI->setOperand(0, NewBC);
- Worklist.Add(SI);
- assert(hasStoreUsersOnly(*NewBC));
+
+
+ // Replace users of BitCast B->A with NewPHI. These will help
+ // later to get rid off a closure formed by OldPHI nodes.
+ Instruction *RetVal = nullptr;
+ for (auto *OldPN : OldPhiNodes) {
+ PHINode *NewPN = NewPNodes[OldPN];
+ for (User *V : OldPN->users()) {
+ if (auto *SI = dyn_cast<StoreInst>(V)) {
+ if (SI->isSimple() && SI->getOperand(0) == OldPN) {
+ Builder.SetInsertPoint(SI);
+ auto *NewBC =
+ cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
+ SI->setOperand(0, NewBC);
+ Worklist.Add(SI);
+ assert(hasStoreUsersOnly(*NewBC));
+ }
+ }
+ else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
+ // Verify it's a B->A cast.
+ Type *TyB = BCI->getOperand(0)->getType();
+ Type *TyA = BCI->getType();
+ if (TyA == DestTy && TyB == SrcTy) {
+ Instruction *I = replaceInstUsesWith(*BCI, NewPN);
+ if (BCI == &CI)
+ RetVal = I;
+ }
+ }
}
}
- return replaceInstUsesWith(CI, NewPNodes[PN]);
+ return RetVal;
}
Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
@@ -2310,7 +2338,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If we found a path from the src to dest, create the getelementptr now.
if (SrcElTy == DstElTy) {
SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0));
- return GetElementPtrInst::CreateInBounds(Src, Idxs);
+ return GetElementPtrInst::CreateInBounds(SrcPTy->getElementType(), Src,
+ Idxs);
}
}
@@ -2355,11 +2384,10 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
// Otherwise, see if our source is an insert. If so, then use the scalar
- // component directly.
- if (InsertElementInst *IEI =
- dyn_cast<InsertElementInst>(CI.getOperand(0)))
- return CastInst::Create(Instruction::BitCast, IEI->getOperand(1),
- DestTy);
+ // component directly:
+ // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
+ if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
+ return new BitCastInst(InsElt->getOperand(1), DestTy);
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index b5bbb09935e2..3a4283ae5406 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1,9 +1,8 @@
//===- InstCombineCompares.cpp --------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -704,7 +703,10 @@ static Value *rewriteGEPAsOffset(Value *Start, Value *Base,
continue;
if (auto *CI = dyn_cast<CastInst>(Val)) {
- NewInsts[CI] = NewInsts[CI->getOperand(0)];
+ // Don't get rid of the intermediate variable here; the store can grow
+ // the map which will invalidate the reference to the input value.
+ Value *V = NewInsts[CI->getOperand(0)];
+ NewInsts[CI] = V;
continue;
}
if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
@@ -1292,8 +1294,8 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
// use the sadd_with_overflow intrinsic to efficiently compute both the
// result and the overflow bit.
Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
- Value *F = Intrinsic::getDeclaration(I.getModule(),
- Intrinsic::sadd_with_overflow, NewType);
+ Function *F = Intrinsic::getDeclaration(
+ I.getModule(), Intrinsic::sadd_with_overflow, NewType);
InstCombiner::BuilderTy &Builder = IC.Builder;
@@ -1315,14 +1317,16 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
return ExtractValueInst::Create(Call, 1, "sadd.overflow");
}
-// Handle (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
+// Handle icmp pred X, 0
Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
CmpInst::Predicate Pred = Cmp.getPredicate();
- Value *X = Cmp.getOperand(0);
+ if (!match(Cmp.getOperand(1), m_Zero()))
+ return nullptr;
- if (match(Cmp.getOperand(1), m_Zero()) && Pred == ICmpInst::ICMP_SGT) {
+ // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
+ if (Pred == ICmpInst::ICMP_SGT) {
Value *A, *B;
- SelectPatternResult SPR = matchSelectPattern(X, A, B);
+ SelectPatternResult SPR = matchSelectPattern(Cmp.getOperand(0), A, B);
if (SPR.Flavor == SPF_SMIN) {
if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT))
return new ICmpInst(Pred, B, Cmp.getOperand(1));
@@ -1330,6 +1334,20 @@ Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
return new ICmpInst(Pred, A, Cmp.getOperand(1));
}
}
+
+ // Given:
+ // icmp eq/ne (urem %x, %y), 0
+ // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
+ // icmp eq/ne %x, 0
+ Value *X, *Y;
+ if (match(Cmp.getOperand(0), m_URem(m_Value(X), m_Value(Y))) &&
+ ICmpInst::isEquality(Pred)) {
+ KnownBits XKnown = computeKnownBits(X, 0, &Cmp);
+ KnownBits YKnown = computeKnownBits(Y, 0, &Cmp);
+ if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
+ return new ICmpInst(Pred, X, Cmp.getOperand(1));
+ }
+
return nullptr;
}
@@ -1624,20 +1642,43 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And,
Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
BinaryOperator *And,
const APInt &C1) {
+ bool isICMP_NE = Cmp.getPredicate() == ICmpInst::ICMP_NE;
+
// For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1
// TODO: We canonicalize to the longer form for scalars because we have
// better analysis/folds for icmp, and codegen may be better with icmp.
- if (Cmp.getPredicate() == CmpInst::ICMP_NE && Cmp.getType()->isVectorTy() &&
- C1.isNullValue() && match(And->getOperand(1), m_One()))
+ if (isICMP_NE && Cmp.getType()->isVectorTy() && C1.isNullValue() &&
+ match(And->getOperand(1), m_One()))
return new TruncInst(And->getOperand(0), Cmp.getType());
const APInt *C2;
- if (!match(And->getOperand(1), m_APInt(C2)))
+ Value *X;
+ if (!match(And, m_And(m_Value(X), m_APInt(C2))))
return nullptr;
+ // Don't perform the following transforms if the AND has multiple uses
if (!And->hasOneUse())
return nullptr;
+ if (Cmp.isEquality() && C1.isNullValue()) {
+ // Restrict this fold to single-use 'and' (PR10267).
+ // Replace (and X, (1 << size(X)-1) != 0) with X s< 0
+ if (C2->isSignMask()) {
+ Constant *Zero = Constant::getNullValue(X->getType());
+ auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
+ return new ICmpInst(NewPred, X, Zero);
+ }
+
+ // Restrict this fold only for single-use 'and' (PR10267).
+ // ((%x & C) == 0) --> %x u< (-C) iff (-C) is power of two.
+ if ((~(*C2) + 1).isPowerOf2()) {
+ Constant *NegBOC =
+ ConstantExpr::getNeg(cast<Constant>(And->getOperand(1)));
+ auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
+ return new ICmpInst(NewPred, X, NegBOC);
+ }
+ }
+
// If the LHS is an 'and' of a truncate and we can widen the and/compare to
// the input width without changing the value produced, eliminate the cast:
//
@@ -1772,13 +1813,22 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
ConstantInt::get(V->getType(), 1));
}
- // X | C == C --> X <=u C
- // X | C != C --> X >u C
- // iff C+1 is a power of 2 (C is a bitmask of the low bits)
- if (Cmp.isEquality() && Cmp.getOperand(1) == Or->getOperand(1) &&
- (C + 1).isPowerOf2()) {
- Pred = (Pred == CmpInst::ICMP_EQ) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
- return new ICmpInst(Pred, Or->getOperand(0), Or->getOperand(1));
+ Value *OrOp0 = Or->getOperand(0), *OrOp1 = Or->getOperand(1);
+ if (Cmp.isEquality() && Cmp.getOperand(1) == OrOp1) {
+ // X | C == C --> X <=u C
+ // X | C != C --> X >u C
+ // iff C+1 is a power of 2 (C is a bitmask of the low bits)
+ if ((C + 1).isPowerOf2()) {
+ Pred = (Pred == CmpInst::ICMP_EQ) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
+ return new ICmpInst(Pred, OrOp0, OrOp1);
+ }
+ // More general: are all bits outside of a mask constant set or not set?
+ // X | C == C --> (X & ~C) == 0
+ // X | C != C --> (X & ~C) != 0
+ if (Or->hasOneUse()) {
+ Value *A = Builder.CreateAnd(OrOp0, ~C);
+ return new ICmpInst(Pred, A, ConstantInt::getNullValue(OrOp0->getType()));
+ }
}
if (!Cmp.isEquality() || !C.isNullValue() || !Or->hasOneUse())
@@ -1799,8 +1849,8 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
// Are we using xors to bitwise check for a pair of (in)equalities? Convert to
// a shorter form that has more potential to be folded even further.
Value *X1, *X2, *X3, *X4;
- if (match(Or->getOperand(0), m_OneUse(m_Xor(m_Value(X1), m_Value(X2)))) &&
- match(Or->getOperand(1), m_OneUse(m_Xor(m_Value(X3), m_Value(X4))))) {
+ if (match(OrOp0, m_OneUse(m_Xor(m_Value(X1), m_Value(X2)))) &&
+ match(OrOp1, m_OneUse(m_Xor(m_Value(X3), m_Value(X4))))) {
// ((X1 ^ X2) || (X3 ^ X4)) == 0 --> (X1 == X2) && (X3 == X4)
// ((X1 ^ X2) || (X3 ^ X4)) != 0 --> (X1 != X2) || (X3 != X4)
Value *Cmp12 = Builder.CreateICmp(Pred, X1, X2);
@@ -1994,6 +2044,27 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
And, Constant::getNullValue(ShType));
}
+ // Simplify 'shl' inequality test into 'and' equality test.
+ if (Cmp.isUnsigned() && Shl->hasOneUse()) {
+ // (X l<< C2) u<=/u> C1 iff C1+1 is power of two -> X & (~C1 l>> C2) ==/!= 0
+ if ((C + 1).isPowerOf2() &&
+ (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT)) {
+ Value *And = Builder.CreateAnd(X, (~C).lshr(ShiftAmt->getZExtValue()));
+ return new ICmpInst(Pred == ICmpInst::ICMP_ULE ? ICmpInst::ICMP_EQ
+ : ICmpInst::ICMP_NE,
+ And, Constant::getNullValue(ShType));
+ }
+ // (X l<< C2) u</u>= C1 iff C1 is power of two -> X & (-C1 l>> C2) ==/!= 0
+ if (C.isPowerOf2() &&
+ (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) {
+ Value *And =
+ Builder.CreateAnd(X, (~(C - 1)).lshr(ShiftAmt->getZExtValue()));
+ return new ICmpInst(Pred == ICmpInst::ICMP_ULT ? ICmpInst::ICMP_EQ
+ : ICmpInst::ICMP_NE,
+ And, Constant::getNullValue(ShType));
+ }
+ }
+
// Transform (icmp pred iM (shl iM %v, N), C)
// -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N))
// Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N.
@@ -2313,6 +2384,16 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp,
const APInt &C) {
Value *X = Sub->getOperand(0), *Y = Sub->getOperand(1);
ICmpInst::Predicate Pred = Cmp.getPredicate();
+ const APInt *C2;
+ APInt SubResult;
+
+ // (icmp P (sub nuw|nsw C2, Y), C) -> (icmp swap(P) Y, C2-C)
+ if (match(X, m_APInt(C2)) &&
+ ((Cmp.isUnsigned() && Sub->hasNoUnsignedWrap()) ||
+ (Cmp.isSigned() && Sub->hasNoSignedWrap())) &&
+ !subWithOverflow(SubResult, *C2, C, Cmp.isSigned()))
+ return new ICmpInst(Cmp.getSwappedPredicate(), Y,
+ ConstantInt::get(Y->getType(), SubResult));
// The following transforms are only worth it if the only user of the subtract
// is the icmp.
@@ -2337,7 +2418,6 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp,
return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
}
- const APInt *C2;
if (!match(X, m_APInt(C2)))
return nullptr;
@@ -2482,20 +2562,76 @@ Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp,
// the entire original Cmp can be simplified to a false.
Value *Cond = Builder.getFalse();
if (TrueWhenLessThan)
- Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS));
+ Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT,
+ OrigLHS, OrigRHS));
if (TrueWhenEqual)
- Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS));
+ Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ,
+ OrigLHS, OrigRHS));
if (TrueWhenGreaterThan)
- Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS));
+ Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT,
+ OrigLHS, OrigRHS));
return replaceInstUsesWith(Cmp, Cond);
}
return nullptr;
}
-Instruction *InstCombiner::foldICmpBitCastConstant(ICmpInst &Cmp,
- BitCastInst *Bitcast,
- const APInt &C) {
+static Instruction *foldICmpBitCast(ICmpInst &Cmp,
+ InstCombiner::BuilderTy &Builder) {
+ auto *Bitcast = dyn_cast<BitCastInst>(Cmp.getOperand(0));
+ if (!Bitcast)
+ return nullptr;
+
+ ICmpInst::Predicate Pred = Cmp.getPredicate();
+ Value *Op1 = Cmp.getOperand(1);
+ Value *BCSrcOp = Bitcast->getOperand(0);
+
+ // Make sure the bitcast doesn't change the number of vector elements.
+ if (Bitcast->getSrcTy()->getScalarSizeInBits() ==
+ Bitcast->getDestTy()->getScalarSizeInBits()) {
+ // Zero-equality and sign-bit checks are preserved through sitofp + bitcast.
+ Value *X;
+ if (match(BCSrcOp, m_SIToFP(m_Value(X)))) {
+ // icmp eq (bitcast (sitofp X)), 0 --> icmp eq X, 0
+ // icmp ne (bitcast (sitofp X)), 0 --> icmp ne X, 0
+ // icmp slt (bitcast (sitofp X)), 0 --> icmp slt X, 0
+ // icmp sgt (bitcast (sitofp X)), 0 --> icmp sgt X, 0
+ if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_SLT ||
+ Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT) &&
+ match(Op1, m_Zero()))
+ return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
+
+ // icmp slt (bitcast (sitofp X)), 1 --> icmp slt X, 1
+ if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_One()))
+ return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), 1));
+
+ // icmp sgt (bitcast (sitofp X)), -1 --> icmp sgt X, -1
+ if (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))
+ return new ICmpInst(Pred, X,
+ ConstantInt::getAllOnesValue(X->getType()));
+ }
+
+ // Zero-equality checks are preserved through unsigned floating-point casts:
+ // icmp eq (bitcast (uitofp X)), 0 --> icmp eq X, 0
+ // icmp ne (bitcast (uitofp X)), 0 --> icmp ne X, 0
+ if (match(BCSrcOp, m_UIToFP(m_Value(X))))
+ if (Cmp.isEquality() && match(Op1, m_Zero()))
+ return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
+ }
+
+ // Test to see if the operands of the icmp are casted versions of other
+ // values. If the ptr->ptr cast can be stripped off both arguments, do so.
+ if (Bitcast->getType()->isPointerTy() &&
+ (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
+ // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
+ // so eliminate it as well.
+ if (auto *BC2 = dyn_cast<BitCastInst>(Op1))
+ Op1 = BC2->getOperand(0);
+
+ Op1 = Builder.CreateBitCast(Op1, BCSrcOp->getType());
+ return new ICmpInst(Pred, BCSrcOp, Op1);
+ }
+
// Folding: icmp <pred> iN X, C
// where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
// and C is a splat of a K-bit pattern
@@ -2503,28 +2639,28 @@ Instruction *InstCombiner::foldICmpBitCastConstant(ICmpInst &Cmp,
// Into:
// %E = extractelement <M x iK> %vec, i32 C'
// icmp <pred> iK %E, trunc(C)
- if (!Bitcast->getType()->isIntegerTy() ||
+ const APInt *C;
+ if (!match(Cmp.getOperand(1), m_APInt(C)) ||
+ !Bitcast->getType()->isIntegerTy() ||
!Bitcast->getSrcTy()->isIntOrIntVectorTy())
return nullptr;
- Value *BCIOp = Bitcast->getOperand(0);
- Value *Vec = nullptr; // 1st vector arg of the shufflevector
- Constant *Mask = nullptr; // Mask arg of the shufflevector
- if (match(BCIOp,
+ Value *Vec;
+ Constant *Mask;
+ if (match(BCSrcOp,
m_ShuffleVector(m_Value(Vec), m_Undef(), m_Constant(Mask)))) {
// Check whether every element of Mask is the same constant
if (auto *Elem = dyn_cast_or_null<ConstantInt>(Mask->getSplatValue())) {
- auto *VecTy = cast<VectorType>(BCIOp->getType());
+ auto *VecTy = cast<VectorType>(BCSrcOp->getType());
auto *EltTy = cast<IntegerType>(VecTy->getElementType());
- auto Pred = Cmp.getPredicate();
- if (C.isSplat(EltTy->getBitWidth())) {
+ if (C->isSplat(EltTy->getBitWidth())) {
// Fold the icmp based on the value of C
// If C is M copies of an iK sized bit pattern,
// then:
// => %E = extractelement <N x iK> %vec, i32 Elem
// icmp <pred> iK %SplatVal, <pattern>
Value *Extract = Builder.CreateExtractElement(Vec, Elem);
- Value *NewC = ConstantInt::get(EltTy, C.trunc(EltTy->getBitWidth()));
+ Value *NewC = ConstantInt::get(EltTy, C->trunc(EltTy->getBitWidth()));
return new ICmpInst(Pred, Extract, NewC);
}
}
@@ -2606,13 +2742,9 @@ Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) {
return I;
}
- if (auto *BCI = dyn_cast<BitCastInst>(Cmp.getOperand(0))) {
- if (Instruction *I = foldICmpBitCastConstant(Cmp, BCI, *C))
+ if (auto *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0)))
+ if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C))
return I;
- }
-
- if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, *C))
- return I;
return nullptr;
}
@@ -2711,24 +2843,6 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
if (C == *BOC && C.isPowerOf2())
return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
BO, Constant::getNullValue(RHS->getType()));
-
- // Don't perform the following transforms if the AND has multiple uses
- if (!BO->hasOneUse())
- break;
-
- // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
- if (BOC->isSignMask()) {
- Constant *Zero = Constant::getNullValue(BOp0->getType());
- auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
- return new ICmpInst(NewPred, BOp0, Zero);
- }
-
- // ((X & ~7) == 0) --> X < 8
- if (C.isNullValue() && (~(*BOC) + 1).isPowerOf2()) {
- Constant *NegBOC = ConstantExpr::getNeg(cast<Constant>(BOp1));
- auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
- return new ICmpInst(NewPred, BOp0, NegBOC);
- }
}
break;
}
@@ -2756,14 +2870,10 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
return nullptr;
}
-/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
-Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
- const APInt &C) {
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0));
- if (!II || !Cmp.isEquality())
- return nullptr;
-
- // Handle icmp {eq|ne} <intrinsic>, Constant.
+/// Fold an equality icmp with LLVM intrinsic and constant operand.
+Instruction *InstCombiner::foldICmpEqIntrinsicWithConstant(ICmpInst &Cmp,
+ IntrinsicInst *II,
+ const APInt &C) {
Type *Ty = II->getType();
unsigned BitWidth = C.getBitWidth();
switch (II->getIntrinsicID()) {
@@ -2823,6 +2933,65 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
return nullptr;
}
+/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
+Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
+ IntrinsicInst *II,
+ const APInt &C) {
+ if (Cmp.isEquality())
+ return foldICmpEqIntrinsicWithConstant(Cmp, II, C);
+
+ Type *Ty = II->getType();
+ unsigned BitWidth = C.getBitWidth();
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::ctlz: {
+ // ctlz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX < 0b00010000
+ if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
+ unsigned Num = C.getLimitedValue();
+ APInt Limit = APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
+ return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT,
+ II->getArgOperand(0), ConstantInt::get(Ty, Limit));
+ }
+
+ // ctlz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX > 0b00011111
+ if (Cmp.getPredicate() == ICmpInst::ICMP_ULT &&
+ C.uge(1) && C.ule(BitWidth)) {
+ unsigned Num = C.getLimitedValue();
+ APInt Limit = APInt::getLowBitsSet(BitWidth, BitWidth - Num);
+ return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_UGT,
+ II->getArgOperand(0), ConstantInt::get(Ty, Limit));
+ }
+ break;
+ }
+ case Intrinsic::cttz: {
+ // Limit to one use to ensure we don't increase instruction count.
+ if (!II->hasOneUse())
+ return nullptr;
+
+ // cttz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX & 0b00001111 == 0
+ if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
+ APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue() + 1);
+ return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ,
+ Builder.CreateAnd(II->getArgOperand(0), Mask),
+ ConstantInt::getNullValue(Ty));
+ }
+
+ // cttz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX & 0b00000111 != 0
+ if (Cmp.getPredicate() == ICmpInst::ICMP_ULT &&
+ C.uge(1) && C.ule(BitWidth)) {
+ APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue());
+ return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE,
+ Builder.CreateAnd(II->getArgOperand(0), Mask),
+ ConstantInt::getNullValue(Ty));
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ return nullptr;
+}
+
/// Handle icmp with constant (but not simple integer constant) RHS.
Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -2983,6 +3152,10 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
// x s> x & (-1 >> y) -> x s> (-1 >> y)
if (X != I.getOperand(0)) // X must be on LHS of comparison!
return nullptr; // Ignore the other case.
+ if (!match(M, m_Constant())) // Can not do this fold with non-constant.
+ return nullptr;
+ if (!match(M, m_NonNegative())) // Must not have any -1 vector elements.
+ return nullptr;
DstPred = ICmpInst::Predicate::ICMP_SGT;
break;
case ICmpInst::Predicate::ICMP_SGE:
@@ -3009,6 +3182,10 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
// x s<= x & (-1 >> y) -> x s<= (-1 >> y)
if (X != I.getOperand(0)) // X must be on LHS of comparison!
return nullptr; // Ignore the other case.
+ if (!match(M, m_Constant())) // Can not do this fold with non-constant.
+ return nullptr;
+ if (!match(M, m_NonNegative())) // Must not have any -1 vector elements.
+ return nullptr;
DstPred = ICmpInst::Predicate::ICMP_SLE;
break;
default:
@@ -3093,6 +3270,64 @@ foldICmpWithTruncSignExtendedVal(ICmpInst &I,
return T1;
}
+// Given pattern:
+// icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0
+// we should move shifts to the same hand of 'and', i.e. rewrite as
+// icmp eq/ne (and (x shift (Q+K)), y), 0 iff (Q+K) u< bitwidth(x)
+// We are only interested in opposite logical shifts here.
+// If we can, we want to end up creating 'lshr' shift.
+static Value *
+foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
+ InstCombiner::BuilderTy &Builder) {
+ if (!I.isEquality() || !match(I.getOperand(1), m_Zero()) ||
+ !I.getOperand(0)->hasOneUse())
+ return nullptr;
+
+ auto m_AnyLogicalShift = m_LogicalShift(m_Value(), m_Value());
+ auto m_AnyLShr = m_LShr(m_Value(), m_Value());
+
+ // Look for an 'and' of two (opposite) logical shifts.
+ // Pick the single-use shift as XShift.
+ Value *XShift, *YShift;
+ if (!match(I.getOperand(0),
+ m_c_And(m_OneUse(m_CombineAnd(m_AnyLogicalShift, m_Value(XShift))),
+ m_CombineAnd(m_AnyLogicalShift, m_Value(YShift)))))
+ return nullptr;
+
+ // If YShift is a single-use 'lshr', swap the shifts around.
+ if (match(YShift, m_OneUse(m_AnyLShr)))
+ std::swap(XShift, YShift);
+
+ // The shifts must be in opposite directions.
+ Instruction::BinaryOps XShiftOpcode =
+ cast<BinaryOperator>(XShift)->getOpcode();
+ if (XShiftOpcode == cast<BinaryOperator>(YShift)->getOpcode())
+ return nullptr; // Do not care about same-direction shifts here.
+
+ Value *X, *XShAmt, *Y, *YShAmt;
+ match(XShift, m_BinOp(m_Value(X), m_Value(XShAmt)));
+ match(YShift, m_BinOp(m_Value(Y), m_Value(YShAmt)));
+
+ // Can we fold (XShAmt+YShAmt) ?
+ Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, XShAmt, YShAmt,
+ SQ.getWithInstruction(&I));
+ if (!NewShAmt)
+ return nullptr;
+ // Is the new shift amount smaller than the bit width?
+ // FIXME: could also rely on ConstantRange.
+ unsigned BitWidth = X->getType()->getScalarSizeInBits();
+ if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
+ APInt(BitWidth, BitWidth))))
+ return nullptr;
+ // All good, we can do this fold. The shift is the same that was for X.
+ Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr
+ ? Builder.CreateLShr(X, NewShAmt)
+ : Builder.CreateShl(X, NewShAmt);
+ Value *T1 = Builder.CreateAnd(T0, Y);
+ return Builder.CreateICmp(I.getPredicate(), T1,
+ Constant::getNullValue(X->getType()));
+}
+
/// Try to fold icmp (binop), X or icmp X, (binop).
/// TODO: A large part of this logic is duplicated in InstSimplify's
/// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
@@ -3448,6 +3683,9 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder))
return replaceInstUsesWith(I, V);
+ if (Value *V = foldShiftIntoShiftInAnotherHandOfAndInICmp(I, SQ, Builder))
+ return replaceInstUsesWith(I, V);
+
return nullptr;
}
@@ -3688,6 +3926,30 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
match(Op1, m_BitReverse(m_Value(B)))))
return new ICmpInst(Pred, A, B);
+ // Canonicalize checking for a power-of-2-or-zero value:
+ // (A & (A-1)) == 0 --> ctpop(A) < 2 (two commuted variants)
+ // ((A-1) & A) != 0 --> ctpop(A) > 1 (two commuted variants)
+ if (!match(Op0, m_OneUse(m_c_And(m_Add(m_Value(A), m_AllOnes()),
+ m_Deferred(A)))) ||
+ !match(Op1, m_ZeroInt()))
+ A = nullptr;
+
+ // (A & -A) == A --> ctpop(A) < 2 (four commuted variants)
+ // (-A & A) != A --> ctpop(A) > 1 (four commuted variants)
+ if (match(Op0, m_OneUse(m_c_And(m_Neg(m_Specific(Op1)), m_Specific(Op1)))))
+ A = Op1;
+ else if (match(Op1,
+ m_OneUse(m_c_And(m_Neg(m_Specific(Op0)), m_Specific(Op0)))))
+ A = Op0;
+
+ if (A) {
+ Type *Ty = A->getType();
+ CallInst *CtPop = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, A);
+ return Pred == ICmpInst::ICMP_EQ
+ ? new ICmpInst(ICmpInst::ICMP_ULT, CtPop, ConstantInt::get(Ty, 2))
+ : new ICmpInst(ICmpInst::ICMP_UGT, CtPop, ConstantInt::get(Ty, 1));
+ }
+
return nullptr;
}
@@ -3698,7 +3960,6 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
Value *LHSCIOp = LHSCI->getOperand(0);
Type *SrcTy = LHSCIOp->getType();
Type *DestTy = LHSCI->getType();
- Value *RHSCIOp;
// Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
// integer type is the same size as the pointer type.
@@ -3740,7 +4001,7 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
if (auto *CI = dyn_cast<CastInst>(ICmp.getOperand(1))) {
// Not an extension from the same type?
- RHSCIOp = CI->getOperand(0);
+ Value *RHSCIOp = CI->getOperand(0);
if (RHSCIOp->getType() != LHSCIOp->getType())
return nullptr;
@@ -3813,104 +4074,83 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
return BinaryOperator::CreateNot(Result);
}
-bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
- Value *RHS, Instruction &OrigI,
- Value *&Result, Constant *&Overflow) {
+static bool isNeutralValue(Instruction::BinaryOps BinaryOp, Value *RHS) {
+ switch (BinaryOp) {
+ default:
+ llvm_unreachable("Unsupported binary op");
+ case Instruction::Add:
+ case Instruction::Sub:
+ return match(RHS, m_Zero());
+ case Instruction::Mul:
+ return match(RHS, m_One());
+ }
+}
+
+OverflowResult InstCombiner::computeOverflow(
+ Instruction::BinaryOps BinaryOp, bool IsSigned,
+ Value *LHS, Value *RHS, Instruction *CxtI) const {
+ switch (BinaryOp) {
+ default:
+ llvm_unreachable("Unsupported binary op");
+ case Instruction::Add:
+ if (IsSigned)
+ return computeOverflowForSignedAdd(LHS, RHS, CxtI);
+ else
+ return computeOverflowForUnsignedAdd(LHS, RHS, CxtI);
+ case Instruction::Sub:
+ if (IsSigned)
+ return computeOverflowForSignedSub(LHS, RHS, CxtI);
+ else
+ return computeOverflowForUnsignedSub(LHS, RHS, CxtI);
+ case Instruction::Mul:
+ if (IsSigned)
+ return computeOverflowForSignedMul(LHS, RHS, CxtI);
+ else
+ return computeOverflowForUnsignedMul(LHS, RHS, CxtI);
+ }
+}
+
+bool InstCombiner::OptimizeOverflowCheck(
+ Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS,
+ Instruction &OrigI, Value *&Result, Constant *&Overflow) {
if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS))
std::swap(LHS, RHS);
- auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) {
- Result = OpResult;
- Overflow = OverflowVal;
- if (ReuseName)
- Result->takeName(&OrigI);
- return true;
- };
-
// If the overflow check was an add followed by a compare, the insertion point
// may be pointing to the compare. We want to insert the new instructions
// before the add in case there are uses of the add between the add and the
// compare.
Builder.SetInsertPoint(&OrigI);
- switch (OCF) {
- case OCF_INVALID:
- llvm_unreachable("bad overflow check kind!");
-
- case OCF_UNSIGNED_ADD: {
- OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI);
- if (OR == OverflowResult::NeverOverflows)
- return SetResult(Builder.CreateNUWAdd(LHS, RHS), Builder.getFalse(),
- true);
-
- if (OR == OverflowResult::AlwaysOverflows)
- return SetResult(Builder.CreateAdd(LHS, RHS), Builder.getTrue(), true);
-
- // Fall through uadd into sadd
- LLVM_FALLTHROUGH;
- }
- case OCF_SIGNED_ADD: {
- // X + 0 -> {X, false}
- if (match(RHS, m_Zero()))
- return SetResult(LHS, Builder.getFalse(), false);
-
- // We can strength reduce this signed add into a regular add if we can prove
- // that it will never overflow.
- if (OCF == OCF_SIGNED_ADD)
- if (willNotOverflowSignedAdd(LHS, RHS, OrigI))
- return SetResult(Builder.CreateNSWAdd(LHS, RHS), Builder.getFalse(),
- true);
- break;
- }
-
- case OCF_UNSIGNED_SUB:
- case OCF_SIGNED_SUB: {
- // X - 0 -> {X, false}
- if (match(RHS, m_Zero()))
- return SetResult(LHS, Builder.getFalse(), false);
-
- if (OCF == OCF_SIGNED_SUB) {
- if (willNotOverflowSignedSub(LHS, RHS, OrigI))
- return SetResult(Builder.CreateNSWSub(LHS, RHS), Builder.getFalse(),
- true);
- } else {
- if (willNotOverflowUnsignedSub(LHS, RHS, OrigI))
- return SetResult(Builder.CreateNUWSub(LHS, RHS), Builder.getFalse(),
- true);
- }
- break;
- }
-
- case OCF_UNSIGNED_MUL: {
- OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI);
- if (OR == OverflowResult::NeverOverflows)
- return SetResult(Builder.CreateNUWMul(LHS, RHS), Builder.getFalse(),
- true);
- if (OR == OverflowResult::AlwaysOverflows)
- return SetResult(Builder.CreateMul(LHS, RHS), Builder.getTrue(), true);
- LLVM_FALLTHROUGH;
+ if (isNeutralValue(BinaryOp, RHS)) {
+ Result = LHS;
+ Overflow = Builder.getFalse();
+ return true;
}
- case OCF_SIGNED_MUL:
- // X * undef -> undef
- if (isa<UndefValue>(RHS))
- return SetResult(RHS, UndefValue::get(Builder.getInt1Ty()), false);
-
- // X * 0 -> {0, false}
- if (match(RHS, m_Zero()))
- return SetResult(RHS, Builder.getFalse(), false);
-
- // X * 1 -> {X, false}
- if (match(RHS, m_One()))
- return SetResult(LHS, Builder.getFalse(), false);
- if (OCF == OCF_SIGNED_MUL)
- if (willNotOverflowSignedMul(LHS, RHS, OrigI))
- return SetResult(Builder.CreateNSWMul(LHS, RHS), Builder.getFalse(),
- true);
- break;
+ switch (computeOverflow(BinaryOp, IsSigned, LHS, RHS, &OrigI)) {
+ case OverflowResult::MayOverflow:
+ return false;
+ case OverflowResult::AlwaysOverflowsLow:
+ case OverflowResult::AlwaysOverflowsHigh:
+ Result = Builder.CreateBinOp(BinaryOp, LHS, RHS);
+ Result->takeName(&OrigI);
+ Overflow = Builder.getTrue();
+ return true;
+ case OverflowResult::NeverOverflows:
+ Result = Builder.CreateBinOp(BinaryOp, LHS, RHS);
+ Result->takeName(&OrigI);
+ Overflow = Builder.getFalse();
+ if (auto *Inst = dyn_cast<Instruction>(Result)) {
+ if (IsSigned)
+ Inst->setHasNoSignedWrap();
+ else
+ Inst->setHasNoUnsignedWrap();
+ }
+ return true;
}
- return false;
+ llvm_unreachable("Unexpected overflow result");
}
/// Recognize and process idiom involving test for multiplication
@@ -4084,8 +4324,8 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
MulA = Builder.CreateZExt(A, MulType);
if (WidthB < MulWidth)
MulB = Builder.CreateZExt(B, MulType);
- Value *F = Intrinsic::getDeclaration(I.getModule(),
- Intrinsic::umul_with_overflow, MulType);
+ Function *F = Intrinsic::getDeclaration(
+ I.getModule(), Intrinsic::umul_with_overflow, MulType);
CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul");
IC.Worklist.Add(MulInstr);
@@ -4881,61 +5121,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return New;
}
- // Zero-equality and sign-bit checks are preserved through sitofp + bitcast.
- Value *X;
- if (match(Op0, m_BitCast(m_SIToFP(m_Value(X))))) {
- // icmp eq (bitcast (sitofp X)), 0 --> icmp eq X, 0
- // icmp ne (bitcast (sitofp X)), 0 --> icmp ne X, 0
- // icmp slt (bitcast (sitofp X)), 0 --> icmp slt X, 0
- // icmp sgt (bitcast (sitofp X)), 0 --> icmp sgt X, 0
- if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_SLT ||
- Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT) &&
- match(Op1, m_Zero()))
- return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
-
- // icmp slt (bitcast (sitofp X)), 1 --> icmp slt X, 1
- if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_One()))
- return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), 1));
-
- // icmp sgt (bitcast (sitofp X)), -1 --> icmp sgt X, -1
- if (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))
- return new ICmpInst(Pred, X, ConstantInt::getAllOnesValue(X->getType()));
- }
-
- // Zero-equality checks are preserved through unsigned floating-point casts:
- // icmp eq (bitcast (uitofp X)), 0 --> icmp eq X, 0
- // icmp ne (bitcast (uitofp X)), 0 --> icmp ne X, 0
- if (match(Op0, m_BitCast(m_UIToFP(m_Value(X)))))
- if (I.isEquality() && match(Op1, m_Zero()))
- return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
-
- // Test to see if the operands of the icmp are casted versions of other
- // values. If the ptr->ptr cast can be stripped off both arguments, we do so
- // now.
- if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
- if (Op0->getType()->isPointerTy() &&
- (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
- // We keep moving the cast from the left operand over to the right
- // operand, where it can often be eliminated completely.
- Op0 = CI->getOperand(0);
-
- // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
- // so eliminate it as well.
- if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1))
- Op1 = CI2->getOperand(0);
-
- // If Op1 is a constant, we can fold the cast into the constant.
- if (Op0->getType() != Op1->getType()) {
- if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
- Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
- } else {
- // Otherwise, cast the RHS right before the icmp
- Op1 = Builder.CreateBitCast(Op1, Op0->getType());
- }
- }
- return new ICmpInst(I.getPredicate(), Op0, Op1);
- }
- }
+ if (Instruction *Res = foldICmpBitCast(I, Builder))
+ return Res;
if (isa<CastInst>(Op0)) {
// Handle the special case of: icmp (cast bool to X), <cst>
@@ -4984,8 +5171,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
isa<IntegerType>(A->getType())) {
Value *Result;
Constant *Overflow;
- if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
- Overflow)) {
+ if (OptimizeOverflowCheck(Instruction::Add, /*Signed*/false, A, B,
+ *AddI, Result, Overflow)) {
replaceInstUsesWith(*AddI, Result);
return replaceInstUsesWith(I, Overflow);
}
@@ -5411,6 +5598,8 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
return replaceInstUsesWith(I, V);
// Simplify 'fcmp pred X, X'
+ Type *OpType = Op0->getType();
+ assert(OpType == Op1->getType() && "fcmp with different-typed operands?");
if (Op0 == Op1) {
switch (Pred) {
default: break;
@@ -5420,7 +5609,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
case FCmpInst::FCMP_UNE: // True if unordered or not equal
// Canonicalize these to be 'fcmp uno %X, 0.0'.
I.setPredicate(FCmpInst::FCMP_UNO);
- I.setOperand(1, Constant::getNullValue(Op0->getType()));
+ I.setOperand(1, Constant::getNullValue(OpType));
return &I;
case FCmpInst::FCMP_ORD: // True if ordered (no nans)
@@ -5429,7 +5618,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
case FCmpInst::FCMP_OLE: // True if ordered and less than or equal
// Canonicalize these to be 'fcmp ord %X, 0.0'.
I.setPredicate(FCmpInst::FCMP_ORD);
- I.setOperand(1, Constant::getNullValue(Op0->getType()));
+ I.setOperand(1, Constant::getNullValue(OpType));
return &I;
}
}
@@ -5438,15 +5627,20 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
// then canonicalize the operand to 0.0.
if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) {
if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0, &TLI)) {
- I.setOperand(0, ConstantFP::getNullValue(Op0->getType()));
+ I.setOperand(0, ConstantFP::getNullValue(OpType));
return &I;
}
if (!match(Op1, m_PosZeroFP()) && isKnownNeverNaN(Op1, &TLI)) {
- I.setOperand(1, ConstantFP::getNullValue(Op0->getType()));
+ I.setOperand(1, ConstantFP::getNullValue(OpType));
return &I;
}
}
+ // fcmp pred (fneg X), (fneg Y) -> fcmp swap(pred) X, Y
+ Value *X, *Y;
+ if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
+ return new FCmpInst(I.getSwappedPredicate(), X, Y, "", &I);
+
// Test if the FCmpInst instruction is used exclusively by a select as
// part of a minimum or maximum operation. If so, refrain from doing
// any other folding. This helps out other analyses which understand
@@ -5465,7 +5659,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
// The sign of 0.0 is ignored by fcmp, so canonicalize to +0.0:
// fcmp Pred X, -0.0 --> fcmp Pred X, 0.0
if (match(Op1, m_AnyZeroFP()) && !match(Op1, m_PosZeroFP())) {
- I.setOperand(1, ConstantFP::getNullValue(Op1->getType()));
+ I.setOperand(1, ConstantFP::getNullValue(OpType));
return &I;
}
@@ -5505,12 +5699,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
if (Instruction *R = foldFabsWithFcmpZero(I))
return R;
- Value *X, *Y;
if (match(Op0, m_FNeg(m_Value(X)))) {
- // fcmp pred (fneg X), (fneg Y) -> fcmp swap(pred) X, Y
- if (match(Op1, m_FNeg(m_Value(Y))))
- return new FCmpInst(I.getSwappedPredicate(), X, Y, "", &I);
-
// fcmp pred (fneg X), C --> fcmp swap(pred) X, -C
Constant *C;
if (match(Op1, m_Constant(C))) {
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 2de41bd5bef5..434b0d591215 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -1,9 +1,8 @@
//===- InstCombineInternal.h - InstCombine pass internals -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -53,13 +52,14 @@ namespace llvm {
class APInt;
class AssumptionCache;
-class CallSite;
+class BlockFrequencyInfo;
class DataLayout;
class DominatorTree;
class GEPOperator;
class GlobalVariable;
class LoopInfo;
class OptimizationRemarkEmitter;
+class ProfileSummaryInfo;
class TargetLibraryInfo;
class User;
@@ -185,40 +185,6 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
return false;
}
-/// Specific patterns of overflow check idioms that we match.
-enum OverflowCheckFlavor {
- OCF_UNSIGNED_ADD,
- OCF_SIGNED_ADD,
- OCF_UNSIGNED_SUB,
- OCF_SIGNED_SUB,
- OCF_UNSIGNED_MUL,
- OCF_SIGNED_MUL,
-
- OCF_INVALID
-};
-
-/// Returns the OverflowCheckFlavor corresponding to a overflow_with_op
-/// intrinsic.
-static inline OverflowCheckFlavor
-IntrinsicIDToOverflowCheckFlavor(unsigned ID) {
- switch (ID) {
- default:
- return OCF_INVALID;
- case Intrinsic::uadd_with_overflow:
- return OCF_UNSIGNED_ADD;
- case Intrinsic::sadd_with_overflow:
- return OCF_SIGNED_ADD;
- case Intrinsic::usub_with_overflow:
- return OCF_UNSIGNED_SUB;
- case Intrinsic::ssub_with_overflow:
- return OCF_SIGNED_SUB;
- case Intrinsic::umul_with_overflow:
- return OCF_UNSIGNED_MUL;
- case Intrinsic::smul_with_overflow:
- return OCF_SIGNED_MUL;
- }
-}
-
/// Some binary operators require special handling to avoid poison and undefined
/// behavior. If a constant vector has undef elements, replace those undefs with
/// identity constants if possible because those are always safe to execute.
@@ -306,6 +272,8 @@ private:
const DataLayout &DL;
const SimplifyQuery SQ;
OptimizationRemarkEmitter &ORE;
+ BlockFrequencyInfo *BFI;
+ ProfileSummaryInfo *PSI;
// Optional analyses. When non-null, these can both be used to do better
// combining and will be updated to reflect any changes.
@@ -317,11 +285,11 @@ public:
InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder,
bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA,
AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
- OptimizationRemarkEmitter &ORE, const DataLayout &DL,
- LoopInfo *LI)
+ OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI)
: Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT),
- DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), LI(LI) {}
+ DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {}
/// Run the combiner over the entire worklist until it is empty.
///
@@ -345,6 +313,7 @@ public:
// I - Change was made, I is still valid, I may be dead though
// otherwise - Change was made, replace I with returned instruction
//
+ Instruction *visitFNeg(UnaryOperator &I);
Instruction *visitAdd(BinaryOperator &I);
Instruction *visitFAdd(BinaryOperator &I);
Value *OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty);
@@ -394,6 +363,7 @@ public:
Instruction *visitSelectInst(SelectInst &SI);
Instruction *visitCallInst(CallInst &CI);
Instruction *visitInvokeInst(InvokeInst &II);
+ Instruction *visitCallBrInst(CallBrInst &CBI);
Instruction *SliceUpIllegalIntegerPHI(PHINode &PN);
Instruction *visitPHINode(PHINode &PN);
@@ -403,6 +373,7 @@ public:
Instruction *visitFree(CallInst &FI);
Instruction *visitLoadInst(LoadInst &LI);
Instruction *visitStoreInst(StoreInst &SI);
+ Instruction *visitAtomicRMWInst(AtomicRMWInst &SI);
Instruction *visitBranchInst(BranchInst &BI);
Instruction *visitFenceInst(FenceInst &FI);
Instruction *visitSwitchInst(SwitchInst &SI);
@@ -464,16 +435,22 @@ private:
/// operation in OperationResult and result of the overflow check in
/// OverflowResult, and return true. If no simplification is possible,
/// returns false.
- bool OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, Value *RHS,
+ bool OptimizeOverflowCheck(Instruction::BinaryOps BinaryOp, bool IsSigned,
+ Value *LHS, Value *RHS,
Instruction &CtxI, Value *&OperationResult,
Constant *&OverflowResult);
- Instruction *visitCallSite(CallSite CS);
+ Instruction *visitCallBase(CallBase &Call);
Instruction *tryOptimizeCall(CallInst *CI);
- bool transformConstExprCastCall(CallSite CS);
- Instruction *transformCallThroughTrampoline(CallSite CS,
- IntrinsicInst *Tramp);
-
+ bool transformConstExprCastCall(CallBase &Call);
+ Instruction *transformCallThroughTrampoline(CallBase &Call,
+ IntrinsicInst &Tramp);
+
+ Value *simplifyMaskedLoad(IntrinsicInst &II);
+ Instruction *simplifyMaskedStore(IntrinsicInst &II);
+ Instruction *simplifyMaskedGather(IntrinsicInst &II);
+ Instruction *simplifyMaskedScatter(IntrinsicInst &II);
+
/// Transform (zext icmp) to bitwise / integer operations in order to
/// eliminate it.
///
@@ -592,6 +569,8 @@ private:
Value *matchSelectFromAndOr(Value *A, Value *B, Value *C, Value *D);
Value *getSelectCondition(Value *A, Value *B);
+ Instruction *foldIntrinsicWithOverflowCommon(IntrinsicInst *II);
+
public:
/// Inserts an instruction \p New before instruction \p Old
///
@@ -647,6 +626,16 @@ public:
return InsertValueInst::Create(Struct, Result, 0);
}
+ /// Create and insert the idiom we use to indicate a block is unreachable
+ /// without having to rewrite the CFG from within InstCombine.
+ void CreateNonTerminatorUnreachable(Instruction *InsertAt) {
+ auto &Ctx = InsertAt->getContext();
+ new StoreInst(ConstantInt::getTrue(Ctx),
+ UndefValue::get(Type::getInt1PtrTy(Ctx)),
+ InsertAt);
+ }
+
+
/// Combiner aware instruction erasure.
///
/// When dealing with an instruction that has side effects or produces a void
@@ -703,7 +692,7 @@ public:
}
OverflowResult computeOverflowForSignedMul(const Value *LHS,
- const Value *RHS,
+ const Value *RHS,
const Instruction *CxtI) const {
return llvm::computeOverflowForSignedMul(LHS, RHS, DL, &AC, CxtI, &DT);
}
@@ -731,6 +720,10 @@ public:
return llvm::computeOverflowForSignedSub(LHS, RHS, DL, &AC, CxtI, &DT);
}
+ OverflowResult computeOverflow(
+ Instruction::BinaryOps BinaryOp, bool IsSigned,
+ Value *LHS, Value *RHS, Instruction *CxtI) const;
+
/// Maximum size of array considered when transforming.
uint64_t MaxArraySizeForCombine;
@@ -802,8 +795,7 @@ private:
Value *simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
APInt DemandedElts,
- int DmaskIdx = -1,
- int TFCIdx = -1);
+ int DmaskIdx = -1);
Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt &UndefElts, unsigned Depth = 0);
@@ -868,8 +860,6 @@ private:
Instruction *foldICmpSelectConstant(ICmpInst &Cmp, SelectInst *Select,
ConstantInt *C);
- Instruction *foldICmpBitCastConstant(ICmpInst &Cmp, BitCastInst *Bitcast,
- const APInt &C);
Instruction *foldICmpTruncConstant(ICmpInst &Cmp, TruncInst *Trunc,
const APInt &C);
Instruction *foldICmpAndConstant(ICmpInst &Cmp, BinaryOperator *And,
@@ -904,7 +894,10 @@ private:
Instruction *foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
BinaryOperator *BO,
const APInt &C);
- Instruction *foldICmpIntrinsicWithConstant(ICmpInst &ICI, const APInt &C);
+ Instruction *foldICmpIntrinsicWithConstant(ICmpInst &ICI, IntrinsicInst *II,
+ const APInt &C);
+ Instruction *foldICmpEqIntrinsicWithConstant(ICmpInst &ICI, IntrinsicInst *II,
+ const APInt &C);
// Helpers of visitSelectInst().
Instruction *foldSelectExtConst(SelectInst &Sel);
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 76ab614090fa..054fb7da09a2 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1,9 +1,8 @@
//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -89,29 +88,29 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
continue;
}
- if (auto CS = CallSite(I)) {
+ if (auto *Call = dyn_cast<CallBase>(I)) {
// If this is the function being called then we treat it like a load and
// ignore it.
- if (CS.isCallee(&U))
+ if (Call->isCallee(&U))
continue;
- unsigned DataOpNo = CS.getDataOperandNo(&U);
- bool IsArgOperand = CS.isArgOperand(&U);
+ unsigned DataOpNo = Call->getDataOperandNo(&U);
+ bool IsArgOperand = Call->isArgOperand(&U);
// Inalloca arguments are clobbered by the call.
- if (IsArgOperand && CS.isInAllocaArgument(DataOpNo))
+ if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
return false;
// If this is a readonly/readnone call site, then we know it is just a
// load (but one that potentially returns the value itself), so we can
// ignore it if we know that the value isn't captured.
- if (CS.onlyReadsMemory() &&
- (CS.getInstruction()->use_empty() || CS.doesNotCapture(DataOpNo)))
+ if (Call->onlyReadsMemory() &&
+ (Call->use_empty() || Call->doesNotCapture(DataOpNo)))
continue;
// If this is being passed as a byval argument, the caller is making a
// copy, so it is only a read of the alloca.
- if (IsArgOperand && CS.isByValArgument(DataOpNo))
+ if (IsArgOperand && Call->isByValArgument(DataOpNo))
continue;
}
@@ -213,8 +212,8 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
Value *NullIdx = Constant::getNullValue(IdxTy);
Value *Idx[2] = {NullIdx, NullIdx};
- Instruction *GEP =
- GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
+ Instruction *GEP = GetElementPtrInst::CreateInBounds(
+ NewTy, New, Idx, New->getName() + ".sub");
IC.InsertNewInstBefore(GEP, *It);
// Now make everything use the getelementptr instead of the original
@@ -299,7 +298,7 @@ void PointerReplacer::replace(Instruction *I) {
if (auto *LT = dyn_cast<LoadInst>(I)) {
auto *V = getReplacement(LT->getPointerOperand());
assert(V && "Operand not replaced");
- auto *NewI = new LoadInst(V);
+ auto *NewI = new LoadInst(I->getType(), V);
NewI->takeName(LT);
IC.InsertNewInstWith(NewI, *LT);
IC.replaceInstUsesWith(*LT, NewI);
@@ -466,7 +465,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
NewPtr = IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS));
LoadInst *NewLoad = IC.Builder.CreateAlignedLoad(
- NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
+ NewTy, NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
MDBuilder MDB(NewLoad->getContext());
for (const auto &MDPair : MD) {
@@ -631,7 +630,7 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
// infinite loop).
if (!Ty->isIntegerTy() && Ty->isSized() &&
DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
- DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty) &&
+ DL.typeSizeEqualsStoreSize(Ty) &&
!DL.isNonIntegralPointerType(Ty) &&
!isMinMaxWithLoads(
peekThroughBitcast(LI.getPointerOperand(), /*OneUseOnly=*/true))) {
@@ -725,7 +724,8 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
Name + ".elt");
auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
- auto *L = IC.Builder.CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack");
+ auto *L = IC.Builder.CreateAlignedLoad(ST->getElementType(i), Ptr,
+ EltAlign, Name + ".unpack");
// Propagate AA metadata. It'll still be valid on the narrowed load.
AAMDNodes AAMD;
LI.getAAMetadata(AAMD);
@@ -775,8 +775,8 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
};
auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
Name + ".elt");
- auto *L = IC.Builder.CreateAlignedLoad(Ptr, MinAlign(Align, Offset),
- Name + ".unpack");
+ auto *L = IC.Builder.CreateAlignedLoad(
+ AT->getElementType(), Ptr, MinAlign(Align, Offset), Name + ".unpack");
AAMDNodes AAMD;
LI.getAAMetadata(AAMD);
L->setAAMetadata(AAMD);
@@ -1064,12 +1064,16 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
// load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
unsigned Align = LI.getAlignment();
- if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, DL, SI) &&
- isSafeToLoadUnconditionally(SI->getOperand(2), Align, DL, SI)) {
- LoadInst *V1 = Builder.CreateLoad(SI->getOperand(1),
- SI->getOperand(1)->getName()+".val");
- LoadInst *V2 = Builder.CreateLoad(SI->getOperand(2),
- SI->getOperand(2)->getName()+".val");
+ if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(), Align,
+ DL, SI) &&
+ isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(), Align,
+ DL, SI)) {
+ LoadInst *V1 =
+ Builder.CreateLoad(LI.getType(), SI->getOperand(1),
+ SI->getOperand(1)->getName() + ".val");
+ LoadInst *V2 =
+ Builder.CreateLoad(LI.getType(), SI->getOperand(2),
+ SI->getOperand(2)->getName() + ".val");
assert(LI.isUnordered() && "implied by above");
V1->setAlignment(Align);
V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
@@ -1436,6 +1440,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
}
}
+ // If we have a store to a location which is known constant, we can conclude
+ // that the store must be storing the constant value (else the memory
+ // wouldn't be constant), and this must be a noop.
+ if (AA->pointsToConstantMemory(Ptr))
+ return eraseInstFromFunction(SI);
+
// Do really simple DSE, to catch cases where there are several consecutive
// stores to the same location, separated by a few arithmetic operations. This
// situation often occurs with bitfield accesses.
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 7e99f3e4e500..cc753ce05313 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1,9 +1,8 @@
//===- InstCombineMulDivRem.cpp -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -375,11 +374,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
return BinaryOperator::CreateFMulFMF(X, ConstantExpr::getFNeg(C), &I);
// Sink negation: -X * Y --> -(X * Y)
- if (match(Op0, m_OneUse(m_FNeg(m_Value(X)))))
+ // But don't transform constant expressions because there's an inverse fold.
+ if (match(Op0, m_OneUse(m_FNeg(m_Value(X)))) && !isa<ConstantExpr>(Op0))
return BinaryOperator::CreateFNegFMF(Builder.CreateFMulFMF(X, Op1, &I), &I);
// Sink negation: Y * -X --> -(X * Y)
- if (match(Op1, m_OneUse(m_FNeg(m_Value(X)))))
+ // But don't transform constant expressions because there's an inverse fold.
+ if (match(Op1, m_OneUse(m_FNeg(m_Value(X)))) && !isa<ConstantExpr>(Op1))
return BinaryOperator::CreateFNegFMF(Builder.CreateFMulFMF(X, Op0, &I), &I);
// fabs(X) * fabs(X) -> X * X
@@ -431,6 +432,14 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
}
+ Value *Z;
+ if (match(&I, m_c_FMul(m_OneUse(m_FDiv(m_Value(X), m_Value(Y))),
+ m_Value(Z)))) {
+ // Sink division: (X / Y) * Z --> (X * Z) / Y
+ Value *NewFMul = Builder.CreateFMulFMF(X, Z, &I);
+ return BinaryOperator::CreateFDivFMF(NewFMul, Y, &I);
+ }
+
// sqrt(X) * sqrt(Y) -> sqrt(X * Y)
// nnan disallows the possibility of returning a number if both operands are
// negative (in that case, we should return NaN).
@@ -442,6 +451,45 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
return replaceInstUsesWith(I, Sqrt);
}
+ // Like the similar transform in instsimplify, this requires 'nsz' because
+ // sqrt(-0.0) = -0.0, and -0.0 * -0.0 does not simplify to -0.0.
+ if (I.hasNoNaNs() && I.hasNoSignedZeros() && Op0 == Op1 &&
+ Op0->hasNUses(2)) {
+ // Peek through fdiv to find squaring of square root:
+ // (X / sqrt(Y)) * (X / sqrt(Y)) --> (X * X) / Y
+ if (match(Op0, m_FDiv(m_Value(X),
+ m_Intrinsic<Intrinsic::sqrt>(m_Value(Y))))) {
+ Value *XX = Builder.CreateFMulFMF(X, X, &I);
+ return BinaryOperator::CreateFDivFMF(XX, Y, &I);
+ }
+ // (sqrt(Y) / X) * (sqrt(Y) / X) --> Y / (X * X)
+ if (match(Op0, m_FDiv(m_Intrinsic<Intrinsic::sqrt>(m_Value(Y)),
+ m_Value(X)))) {
+ Value *XX = Builder.CreateFMulFMF(X, X, &I);
+ return BinaryOperator::CreateFDivFMF(Y, XX, &I);
+ }
+ }
+
+ // exp(X) * exp(Y) -> exp(X + Y)
+ // Match as long as at least one of exp has only one use.
+ if (match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X))) &&
+ match(Op1, m_Intrinsic<Intrinsic::exp>(m_Value(Y))) &&
+ (Op0->hasOneUse() || Op1->hasOneUse())) {
+ Value *XY = Builder.CreateFAddFMF(X, Y, &I);
+ Value *Exp = Builder.CreateUnaryIntrinsic(Intrinsic::exp, XY, &I);
+ return replaceInstUsesWith(I, Exp);
+ }
+
+ // exp2(X) * exp2(Y) -> exp2(X + Y)
+ // Match as long as at least one of exp2 has only one use.
+ if (match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X))) &&
+ match(Op1, m_Intrinsic<Intrinsic::exp2>(m_Value(Y))) &&
+ (Op0->hasOneUse() || Op1->hasOneUse())) {
+ Value *XY = Builder.CreateFAddFMF(X, Y, &I);
+ Value *Exp2 = Builder.CreateUnaryIntrinsic(Intrinsic::exp2, XY, &I);
+ return replaceInstUsesWith(I, Exp2);
+ }
+
// (X*Y) * X => (X*X) * Y where Y != X
// The purpose is two-fold:
// 1) to form a power expression (of X).
@@ -576,7 +624,7 @@ static bool isMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,
if (IsSigned && C1.isMinSignedValue() && C2.isAllOnesValue())
return false;
- APInt Remainder(C1.getBitWidth(), /*Val=*/0ULL, IsSigned);
+ APInt Remainder(C1.getBitWidth(), /*val=*/0ULL, IsSigned);
if (IsSigned)
APInt::sdivrem(C1, C2, Quotient, Remainder);
else
@@ -613,7 +661,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
// (X / C1) / C2 -> X / (C1*C2)
if ((IsSigned && match(Op0, m_SDiv(m_Value(X), m_APInt(C1)))) ||
(!IsSigned && match(Op0, m_UDiv(m_Value(X), m_APInt(C1))))) {
- APInt Product(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+ APInt Product(C1->getBitWidth(), /*val=*/0ULL, IsSigned);
if (!multiplyOverflows(*C1, *C2, Product, IsSigned))
return BinaryOperator::Create(I.getOpcode(), X,
ConstantInt::get(Ty, Product));
@@ -621,7 +669,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
if ((IsSigned && match(Op0, m_NSWMul(m_Value(X), m_APInt(C1)))) ||
(!IsSigned && match(Op0, m_NUWMul(m_Value(X), m_APInt(C1))))) {
- APInt Quotient(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+ APInt Quotient(C1->getBitWidth(), /*val=*/0ULL, IsSigned);
// (X * C1) / C2 -> X / (C2 / C1) if C2 is a multiple of C1.
if (isMultiple(*C2, *C1, Quotient, IsSigned)) {
@@ -645,7 +693,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
if ((IsSigned && match(Op0, m_NSWShl(m_Value(X), m_APInt(C1))) &&
*C1 != C1->getBitWidth() - 1) ||
(!IsSigned && match(Op0, m_NUWShl(m_Value(X), m_APInt(C1))))) {
- APInt Quotient(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+ APInt Quotient(C1->getBitWidth(), /*val=*/0ULL, IsSigned);
APInt C1Shifted = APInt::getOneBitSet(
C1->getBitWidth(), static_cast<unsigned>(C1->getLimitedValue()));
@@ -977,6 +1025,10 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
(match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)))
return BinaryOperator::CreateNeg(Op0);
+ // X / INT_MIN --> X == INT_MIN
+ if (match(Op1, m_SignMask()))
+ return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), I.getType());
+
const APInt *Op1C;
if (match(Op1, m_APInt(Op1C))) {
// sdiv exact X, C --> ashr exact X, log2(C)
@@ -1001,22 +1053,25 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
Value *NarrowOp = Builder.CreateSDiv(Op0Src, NarrowDivisor);
return new SExtInst(NarrowOp, Op0->getType());
}
- }
- if (Constant *RHS = dyn_cast<Constant>(Op1)) {
- // X/INT_MIN -> X == INT_MIN
- if (RHS->isMinSignedValue())
- return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), I.getType());
-
- // -X/C --> X/-C provided the negation doesn't overflow.
- Value *X;
- if (match(Op0, m_NSWSub(m_Zero(), m_Value(X)))) {
- auto *BO = BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(RHS));
+ // -X / C --> X / -C (if the negation doesn't overflow).
+ // TODO: This could be enhanced to handle arbitrary vector constants by
+ // checking if all elements are not the min-signed-val.
+ if (!Op1C->isMinSignedValue() &&
+ match(Op0, m_NSWSub(m_Zero(), m_Value(X)))) {
+ Constant *NegC = ConstantInt::get(I.getType(), -(*Op1C));
+ Instruction *BO = BinaryOperator::CreateSDiv(X, NegC);
BO->setIsExact(I.isExact());
return BO;
}
}
+ // -X / Y --> -(X / Y)
+ Value *Y;
+ if (match(&I, m_SDiv(m_OneUse(m_NSWSub(m_Zero(), m_Value(X))), m_Value(Y))))
+ return BinaryOperator::CreateNSWNeg(
+ Builder.CreateSDiv(X, Y, I.getName(), I.isExact()));
+
// If the sign bits of both operands are zero (i.e. we can prove they are
// unsigned inputs), turn this into a udiv.
APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits()));
@@ -1161,7 +1216,8 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
IRBuilder<> B(&I);
IRBuilder<>::FastMathFlagGuard FMFGuard(B);
B.setFastMathFlags(I.getFastMathFlags());
- AttributeList Attrs = CallSite(Op0).getCalledFunction()->getAttributes();
+ AttributeList Attrs =
+ cast<CallBase>(Op0)->getCalledFunction()->getAttributes();
Value *Res = emitUnaryFloatFnCall(X, &TLI, LibFunc_tan, LibFunc_tanf,
LibFunc_tanl, B, Attrs);
if (IsCot)
@@ -1305,6 +1361,11 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
}
}
+ // -X srem Y --> -(X srem Y)
+ Value *X, *Y;
+ if (match(&I, m_SRem(m_OneUse(m_NSWSub(m_Zero(), m_Value(X))), m_Value(Y))))
+ return BinaryOperator::CreateNSWNeg(Builder.CreateSRem(X, Y));
+
// If the sign bits of both operands are zero (i.e. we can prove they are
// unsigned inputs), turn this into a urem.
APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits()));
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 7603cf4d7958..5820ab726637 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -1,9 +1,8 @@
//===- InstCombinePHI.cpp -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -596,7 +595,8 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
Value *InVal = FirstLI->getOperand(0);
NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
- LoadInst *NewLI = new LoadInst(NewPN, "", isVolatile, LoadAlignment);
+ LoadInst *NewLI =
+ new LoadInst(FirstLI->getType(), NewPN, "", isVolatile, LoadAlignment);
unsigned KnownIDs[] = {
LLVMContext::MD_tbaa,
@@ -1004,6 +1004,11 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
!isa<ConstantInt>(UserI->getOperand(1)))
return nullptr;
+ // Bail on out of range shifts.
+ unsigned SizeInBits = UserI->getType()->getScalarSizeInBits();
+ if (cast<ConstantInt>(UserI->getOperand(1))->getValue().uge(SizeInBits))
+ return nullptr;
+
unsigned Shift = cast<ConstantInt>(UserI->getOperand(1))->getZExtValue();
PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, UserI->user_back()));
}
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index faf58a08976d..aefaf5af1750 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1,9 +1,8 @@
//===- InstCombineSelect.cpp ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -293,6 +292,8 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
return nullptr;
// If this is a cast from the same type, merge.
+ Value *Cond = SI.getCondition();
+ Type *CondTy = Cond->getType();
if (TI->getNumOperands() == 1 && TI->isCast()) {
Type *FIOpndTy = FI->getOperand(0)->getType();
if (TI->getOperand(0)->getType() != FIOpndTy)
@@ -300,7 +301,6 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
// The select condition may be a vector. We may only change the operand
// type if the vector width remains the same (and matches the condition).
- Type *CondTy = SI.getCondition()->getType();
if (CondTy->isVectorTy()) {
if (!FIOpndTy->isVectorTy())
return nullptr;
@@ -327,12 +327,24 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
// Fold this by inserting a select from the input values.
Value *NewSI =
- Builder.CreateSelect(SI.getCondition(), TI->getOperand(0),
- FI->getOperand(0), SI.getName() + ".v", &SI);
+ Builder.CreateSelect(Cond, TI->getOperand(0), FI->getOperand(0),
+ SI.getName() + ".v", &SI);
return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
TI->getType());
}
+ // Cond ? -X : -Y --> -(Cond ? X : Y)
+ Value *X, *Y;
+ if (match(TI, m_FNeg(m_Value(X))) && match(FI, m_FNeg(m_Value(Y))) &&
+ (TI->hasOneUse() || FI->hasOneUse())) {
+ Value *NewSel = Builder.CreateSelect(Cond, X, Y, SI.getName() + ".v", &SI);
+ // TODO: Remove the hack for the binop form when the unary op is optimized
+ // properly with all IR passes.
+ if (TI->getOpcode() != Instruction::FNeg)
+ return BinaryOperator::CreateFNegFMF(NewSel, cast<BinaryOperator>(TI));
+ return UnaryOperator::CreateFNeg(NewSel);
+ }
+
// Only handle binary operators (including two-operand getelementptr) with
// one-use here. As with the cast case above, it may be possible to relax the
// one-use constraint, but that needs be examined carefully since it may not
@@ -374,13 +386,12 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
// If the select condition is a vector, the operands of the original select's
// operands also must be vectors. This may not be the case for getelementptr
// for example.
- if (SI.getCondition()->getType()->isVectorTy() &&
- (!OtherOpT->getType()->isVectorTy() ||
- !OtherOpF->getType()->isVectorTy()))
+ if (CondTy->isVectorTy() && (!OtherOpT->getType()->isVectorTy() ||
+ !OtherOpF->getType()->isVectorTy()))
return nullptr;
// If we reach here, they do have operations in common.
- Value *NewSI = Builder.CreateSelect(SI.getCondition(), OtherOpT, OtherOpF,
+ Value *NewSI = Builder.CreateSelect(Cond, OtherOpT, OtherOpF,
SI.getName() + ".v", &SI);
Value *Op0 = MatchIsOpZero ? MatchOp : NewSI;
Value *Op1 = MatchIsOpZero ? NewSI : MatchOp;
@@ -521,6 +532,46 @@ static Instruction *foldSelectICmpAndAnd(Type *SelType, const ICmpInst *Cmp,
}
/// We want to turn:
+/// (select (icmp sgt x, C), lshr (X, Y), ashr (X, Y)); iff C s>= -1
+/// (select (icmp slt x, C), ashr (X, Y), lshr (X, Y)); iff C s>= 0
+/// into:
+/// ashr (X, Y)
+static Value *foldSelectICmpLshrAshr(const ICmpInst *IC, Value *TrueVal,
+ Value *FalseVal,
+ InstCombiner::BuilderTy &Builder) {
+ ICmpInst::Predicate Pred = IC->getPredicate();
+ Value *CmpLHS = IC->getOperand(0);
+ Value *CmpRHS = IC->getOperand(1);
+ if (!CmpRHS->getType()->isIntOrIntVectorTy())
+ return nullptr;
+
+ Value *X, *Y;
+ unsigned Bitwidth = CmpRHS->getType()->getScalarSizeInBits();
+ if ((Pred != ICmpInst::ICMP_SGT ||
+ !match(CmpRHS,
+ m_SpecificInt_ICMP(ICmpInst::ICMP_SGE, APInt(Bitwidth, -1)))) &&
+ (Pred != ICmpInst::ICMP_SLT ||
+ !match(CmpRHS,
+ m_SpecificInt_ICMP(ICmpInst::ICMP_SGE, APInt(Bitwidth, 0)))))
+ return nullptr;
+
+ // Canonicalize so that ashr is in FalseVal.
+ if (Pred == ICmpInst::ICMP_SLT)
+ std::swap(TrueVal, FalseVal);
+
+ if (match(TrueVal, m_LShr(m_Value(X), m_Value(Y))) &&
+ match(FalseVal, m_AShr(m_Specific(X), m_Specific(Y))) &&
+ match(CmpLHS, m_Specific(X))) {
+ const auto *Ashr = cast<Instruction>(FalseVal);
+ // if lshr is not exact and ashr is, this new ashr must not be exact.
+ bool IsExact = Ashr->isExact() && cast<Instruction>(TrueVal)->isExact();
+ return Builder.CreateAShr(X, Y, IC->getName(), IsExact);
+ }
+
+ return nullptr;
+}
+
+/// We want to turn:
/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
/// into:
/// (or (shl (and X, C1), C3), Y)
@@ -623,11 +674,7 @@ static Value *foldSelectICmpAndOr(const ICmpInst *IC, Value *TrueVal,
return Builder.CreateOr(V, Y);
}
-/// Transform patterns such as: (a > b) ? a - b : 0
-/// into: ((a > b) ? a : b) - b)
-/// This produces a canonical max pattern that is more easily recognized by the
-/// backend and converted into saturated subtraction instructions if those
-/// exist.
+/// Transform patterns such as (a > b) ? a - b : 0 into usub.sat(a, b).
/// There are 8 commuted/swapped variants of this pattern.
/// TODO: Also support a - UMIN(a,b) patterns.
static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI,
@@ -669,11 +716,73 @@ static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI,
if (!TrueVal->hasOneUse())
return nullptr;
- // All checks passed, convert to canonical unsigned saturated subtraction
- // form: sub(max()).
- // (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b)
- Value *Max = Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B);
- return IsNegative ? Builder.CreateSub(B, Max) : Builder.CreateSub(Max, B);
+ // (a > b) ? a - b : 0 -> usub.sat(a, b)
+ // (a > b) ? b - a : 0 -> -usub.sat(a, b)
+ Value *Result = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, A, B);
+ if (IsNegative)
+ Result = Builder.CreateNeg(Result);
+ return Result;
+}
+
+static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
+ InstCombiner::BuilderTy &Builder) {
+ if (!Cmp->hasOneUse())
+ return nullptr;
+
+ // Match unsigned saturated add with constant.
+ Value *Cmp0 = Cmp->getOperand(0);
+ Value *Cmp1 = Cmp->getOperand(1);
+ ICmpInst::Predicate Pred = Cmp->getPredicate();
+ Value *X;
+ const APInt *C, *CmpC;
+ if (Pred == ICmpInst::ICMP_ULT &&
+ match(TVal, m_Add(m_Value(X), m_APInt(C))) && X == Cmp0 &&
+ match(FVal, m_AllOnes()) && match(Cmp1, m_APInt(CmpC)) && *CmpC == ~*C) {
+ // (X u< ~C) ? (X + C) : -1 --> uadd.sat(X, C)
+ return Builder.CreateBinaryIntrinsic(
+ Intrinsic::uadd_sat, X, ConstantInt::get(X->getType(), *C));
+ }
+
+ // Match unsigned saturated add of 2 variables with an unnecessary 'not'.
+ // There are 8 commuted variants.
+ // Canonicalize -1 (saturated result) to true value of the select. Just
+ // swapping the compare operands is legal, because the selected value is the
+ // same in case of equality, so we can interchange u< and u<=.
+ if (match(FVal, m_AllOnes())) {
+ std::swap(TVal, FVal);
+ std::swap(Cmp0, Cmp1);
+ }
+ if (!match(TVal, m_AllOnes()))
+ return nullptr;
+
+ // Canonicalize predicate to 'ULT'.
+ if (Pred == ICmpInst::ICMP_UGT) {
+ Pred = ICmpInst::ICMP_ULT;
+ std::swap(Cmp0, Cmp1);
+ }
+ if (Pred != ICmpInst::ICMP_ULT)
+ return nullptr;
+
+ // Match unsigned saturated add of 2 variables with an unnecessary 'not'.
+ Value *Y;
+ if (match(Cmp0, m_Not(m_Value(X))) &&
+ match(FVal, m_c_Add(m_Specific(X), m_Value(Y))) && Y == Cmp1) {
+ // (~X u< Y) ? -1 : (X + Y) --> uadd.sat(X, Y)
+ // (~X u< Y) ? -1 : (Y + X) --> uadd.sat(X, Y)
+ return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, X, Y);
+ }
+ // The 'not' op may be included in the sum but not the compare.
+ X = Cmp0;
+ Y = Cmp1;
+ if (match(FVal, m_c_Add(m_Not(m_Specific(X)), m_Specific(Y)))) {
+ // (X u< Y) ? -1 : (~X + Y) --> uadd.sat(~X, Y)
+ // (X u< Y) ? -1 : (Y + ~X) --> uadd.sat(Y, ~X)
+ BinaryOperator *BO = cast<BinaryOperator>(FVal);
+ return Builder.CreateBinaryIntrinsic(
+ Intrinsic::uadd_sat, BO->getOperand(0), BO->getOperand(1));
+ }
+
+ return nullptr;
}
/// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single
@@ -1043,12 +1152,18 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
if (Value *V = foldSelectICmpAndOr(ICI, TrueVal, FalseVal, Builder))
return replaceInstUsesWith(SI, V);
+ if (Value *V = foldSelectICmpLshrAshr(ICI, TrueVal, FalseVal, Builder))
+ return replaceInstUsesWith(SI, V);
+
if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder))
return replaceInstUsesWith(SI, V);
if (Value *V = canonicalizeSaturatedSubtract(ICI, TrueVal, FalseVal, Builder))
return replaceInstUsesWith(SI, V);
+ if (Value *V = canonicalizeSaturatedAdd(ICI, TrueVal, FalseVal, Builder))
+ return replaceInstUsesWith(SI, V);
+
return Changed ? &SI : nullptr;
}
@@ -1496,6 +1611,43 @@ static Instruction *foldSelectCmpXchg(SelectInst &SI) {
return nullptr;
}
+static Instruction *moveAddAfterMinMax(SelectPatternFlavor SPF, Value *X,
+ Value *Y,
+ InstCombiner::BuilderTy &Builder) {
+ assert(SelectPatternResult::isMinOrMax(SPF) && "Expected min/max pattern");
+ bool IsUnsigned = SPF == SelectPatternFlavor::SPF_UMIN ||
+ SPF == SelectPatternFlavor::SPF_UMAX;
+ // TODO: If InstSimplify could fold all cases where C2 <= C1, we could change
+ // the constant value check to an assert.
+ Value *A;
+ const APInt *C1, *C2;
+ if (IsUnsigned && match(X, m_NUWAdd(m_Value(A), m_APInt(C1))) &&
+ match(Y, m_APInt(C2)) && C2->uge(*C1) && X->hasNUses(2)) {
+ // umin (add nuw A, C1), C2 --> add nuw (umin A, C2 - C1), C1
+ // umax (add nuw A, C1), C2 --> add nuw (umax A, C2 - C1), C1
+ Value *NewMinMax = createMinMax(Builder, SPF, A,
+ ConstantInt::get(X->getType(), *C2 - *C1));
+ return BinaryOperator::CreateNUW(BinaryOperator::Add, NewMinMax,
+ ConstantInt::get(X->getType(), *C1));
+ }
+
+ if (!IsUnsigned && match(X, m_NSWAdd(m_Value(A), m_APInt(C1))) &&
+ match(Y, m_APInt(C2)) && X->hasNUses(2)) {
+ bool Overflow;
+ APInt Diff = C2->ssub_ov(*C1, Overflow);
+ if (!Overflow) {
+ // smin (add nsw A, C1), C2 --> add nsw (smin A, C2 - C1), C1
+ // smax (add nsw A, C1), C2 --> add nsw (smax A, C2 - C1), C1
+ Value *NewMinMax = createMinMax(Builder, SPF, A,
+ ConstantInt::get(X->getType(), Diff));
+ return BinaryOperator::CreateNSW(BinaryOperator::Add, NewMinMax,
+ ConstantInt::get(X->getType(), *C1));
+ }
+ }
+
+ return nullptr;
+}
+
/// Reduce a sequence of min/max with a common operand.
static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS,
Value *RHS,
@@ -1757,37 +1909,55 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// NOTE: if we wanted to, this is where to detect MIN/MAX
}
+ }
- // Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need
- // fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work. We
- // also require nnan because we do not want to unintentionally change the
- // sign of a NaN value.
- Value *X = FCI->getOperand(0);
- FCmpInst::Predicate Pred = FCI->getPredicate();
- if (match(FCI->getOperand(1), m_AnyZeroFP()) && FCI->hasNoNaNs()) {
- // (X <= +/-0.0) ? (0.0 - X) : X --> fabs(X)
- // (X > +/-0.0) ? X : (0.0 - X) --> fabs(X)
- if ((X == FalseVal && Pred == FCmpInst::FCMP_OLE &&
- match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(X)))) ||
- (X == TrueVal && Pred == FCmpInst::FCMP_OGT &&
- match(FalseVal, m_FSub(m_PosZeroFP(), m_Specific(X))))) {
- Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, FCI);
- return replaceInstUsesWith(SI, Fabs);
- }
- // With nsz:
- // (X < +/-0.0) ? -X : X --> fabs(X)
- // (X <= +/-0.0) ? -X : X --> fabs(X)
- // (X > +/-0.0) ? X : -X --> fabs(X)
- // (X >= +/-0.0) ? X : -X --> fabs(X)
- if (FCI->hasNoSignedZeros() &&
- ((X == FalseVal && match(TrueVal, m_FNeg(m_Specific(X))) &&
- (Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE)) ||
- (X == TrueVal && match(FalseVal, m_FNeg(m_Specific(X))) &&
- (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE)))) {
- Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, FCI);
- return replaceInstUsesWith(SI, Fabs);
- }
- }
+ // Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need
+ // fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work. We
+ // also require nnan because we do not want to unintentionally change the
+ // sign of a NaN value.
+ // FIXME: These folds should test/propagate FMF from the select, not the
+ // fsub or fneg.
+ // (X <= +/-0.0) ? (0.0 - X) : X --> fabs(X)
+ Instruction *FSub;
+ if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) &&
+ match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(FalseVal))) &&
+ match(TrueVal, m_Instruction(FSub)) && FSub->hasNoNaNs() &&
+ (Pred == FCmpInst::FCMP_OLE || Pred == FCmpInst::FCMP_ULE)) {
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, FSub);
+ return replaceInstUsesWith(SI, Fabs);
+ }
+ // (X > +/-0.0) ? X : (0.0 - X) --> fabs(X)
+ if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) &&
+ match(FalseVal, m_FSub(m_PosZeroFP(), m_Specific(TrueVal))) &&
+ match(FalseVal, m_Instruction(FSub)) && FSub->hasNoNaNs() &&
+ (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_UGT)) {
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, FSub);
+ return replaceInstUsesWith(SI, Fabs);
+ }
+ // With nnan and nsz:
+ // (X < +/-0.0) ? -X : X --> fabs(X)
+ // (X <= +/-0.0) ? -X : X --> fabs(X)
+ Instruction *FNeg;
+ if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) &&
+ match(TrueVal, m_FNeg(m_Specific(FalseVal))) &&
+ match(TrueVal, m_Instruction(FNeg)) &&
+ FNeg->hasNoNaNs() && FNeg->hasNoSignedZeros() &&
+ (Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE ||
+ Pred == FCmpInst::FCMP_ULT || Pred == FCmpInst::FCMP_ULE)) {
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, FNeg);
+ return replaceInstUsesWith(SI, Fabs);
+ }
+ // With nnan and nsz:
+ // (X > +/-0.0) ? X : -X --> fabs(X)
+ // (X >= +/-0.0) ? X : -X --> fabs(X)
+ if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) &&
+ match(FalseVal, m_FNeg(m_Specific(TrueVal))) &&
+ match(FalseVal, m_Instruction(FNeg)) &&
+ FNeg->hasNoNaNs() && FNeg->hasNoSignedZeros() &&
+ (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE ||
+ Pred == FCmpInst::FCMP_UGT || Pred == FCmpInst::FCMP_UGE)) {
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, FNeg);
+ return replaceInstUsesWith(SI, Fabs);
}
// See if we are selecting two values based on a comparison of the two values.
@@ -1895,11 +2065,27 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *I = moveNotAfterMinMax(RHS, LHS))
return I;
+ if (Instruction *I = moveAddAfterMinMax(SPF, LHS, RHS, Builder))
+ return I;
+
if (Instruction *I = factorizeMinMaxTree(SPF, LHS, RHS, Builder))
return I;
}
}
+ // Canonicalize select of FP values where NaN and -0.0 are not valid as
+ // minnum/maxnum intrinsics.
+ if (isa<FPMathOperator>(SI) && SI.hasNoNaNs() && SI.hasNoSignedZeros()) {
+ Value *X, *Y;
+ if (match(&SI, m_OrdFMax(m_Value(X), m_Value(Y))))
+ return replaceInstUsesWith(
+ SI, Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, X, Y, &SI));
+
+ if (match(&SI, m_OrdFMin(m_Value(X), m_Value(Y))))
+ return replaceInstUsesWith(
+ SI, Builder.CreateBinaryIntrinsic(Intrinsic::minnum, X, Y, &SI));
+ }
+
// See if we can fold the select into a phi node if the condition is a select.
if (auto *PN = dyn_cast<PHINode>(SI.getCondition()))
// The true/false values have to be live in the PHI predecessor's blocks.
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index c562d45a9e2b..c821292400cd 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1,9 +1,8 @@
//===- InstCombineShifts.cpp ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,6 +20,51 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
+// Given pattern:
+// (x shiftopcode Q) shiftopcode K
+// we should rewrite it as
+// x shiftopcode (Q+K) iff (Q+K) u< bitwidth(x)
+// This is valid for any shift, but they must be identical.
+static Instruction *
+reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0,
+ const SimplifyQuery &SQ) {
+ // Look for: (x shiftopcode ShAmt0) shiftopcode ShAmt1
+ Value *X, *ShAmt1, *ShAmt0;
+ Instruction *Sh1;
+ if (!match(Sh0, m_Shift(m_CombineAnd(m_Shift(m_Value(X), m_Value(ShAmt1)),
+ m_Instruction(Sh1)),
+ m_Value(ShAmt0))))
+ return nullptr;
+
+ // The shift opcodes must be identical.
+ Instruction::BinaryOps ShiftOpcode = Sh0->getOpcode();
+ if (ShiftOpcode != Sh1->getOpcode())
+ return nullptr;
+ // Can we fold (ShAmt0+ShAmt1) ?
+ Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, ShAmt0, ShAmt1,
+ SQ.getWithInstruction(Sh0));
+ if (!NewShAmt)
+ return nullptr; // Did not simplify.
+ // Is the new shift amount smaller than the bit width?
+ // FIXME: could also rely on ConstantRange.
+ unsigned BitWidth = X->getType()->getScalarSizeInBits();
+ if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
+ APInt(BitWidth, BitWidth))))
+ return nullptr;
+ // All good, we can do this fold.
+ BinaryOperator *NewShift = BinaryOperator::Create(ShiftOpcode, X, NewShAmt);
+ // If both of the original shifts had the same flag set, preserve the flag.
+ if (ShiftOpcode == Instruction::BinaryOps::Shl) {
+ NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() &&
+ Sh1->hasNoUnsignedWrap());
+ NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() &&
+ Sh1->hasNoSignedWrap());
+ } else {
+ NewShift->setIsExact(Sh0->isExact() && Sh1->isExact());
+ }
+ return NewShift;
+}
+
Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
assert(Op0->getType() == Op1->getType());
@@ -39,6 +83,10 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
return Res;
+ if (Instruction *NewShift =
+ reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ))
+ return NewShift;
+
// (C1 shift (A add C2)) -> (C1 shift C2) shift A)
// iff A and C2 are both positive.
Value *A;
@@ -313,35 +361,17 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
// If this is a bitwise operator or add with a constant RHS we might be able
// to pull it through a shift.
static bool canShiftBinOpWithConstantRHS(BinaryOperator &Shift,
- BinaryOperator *BO,
- const APInt &C) {
- bool IsValid = true; // Valid only for And, Or Xor,
- bool HighBitSet = false; // Transform ifhigh bit of constant set?
-
+ BinaryOperator *BO) {
switch (BO->getOpcode()) {
- default: IsValid = false; break; // Do not perform transform!
+ default:
+ return false; // Do not perform transform!
case Instruction::Add:
- IsValid = Shift.getOpcode() == Instruction::Shl;
- break;
+ return Shift.getOpcode() == Instruction::Shl;
case Instruction::Or:
case Instruction::Xor:
- HighBitSet = false;
- break;
case Instruction::And:
- HighBitSet = true;
- break;
+ return true;
}
-
- // If this is a signed shift right, and the high bit is modified
- // by the logical operation, do not perform the transformation.
- // The HighBitSet boolean indicates the value of the high bit of
- // the constant which would cause it to be modified for this
- // operation.
- //
- if (IsValid && Shift.getOpcode() == Instruction::AShr)
- IsValid = C.isNegative() == HighBitSet;
-
- return IsValid;
}
Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
@@ -508,7 +538,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
// shift is the only use, we can pull it out of the shift.
const APInt *Op0C;
if (match(Op0BO->getOperand(1), m_APInt(Op0C))) {
- if (canShiftBinOpWithConstantRHS(I, Op0BO, *Op0C)) {
+ if (canShiftBinOpWithConstantRHS(I, Op0BO)) {
Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
cast<Constant>(Op0BO->getOperand(1)), Op1);
@@ -552,7 +582,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
const APInt *C;
if (!isa<Constant>(FalseVal) && TBO->getOperand(0) == FalseVal &&
match(TBO->getOperand(1), m_APInt(C)) &&
- canShiftBinOpWithConstantRHS(I, TBO, *C)) {
+ canShiftBinOpWithConstantRHS(I, TBO)) {
Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
cast<Constant>(TBO->getOperand(1)), Op1);
@@ -571,7 +601,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
const APInt *C;
if (!isa<Constant>(TrueVal) && FBO->getOperand(0) == TrueVal &&
match(FBO->getOperand(1), m_APInt(C)) &&
- canShiftBinOpWithConstantRHS(I, FBO, *C)) {
+ canShiftBinOpWithConstantRHS(I, FBO)) {
Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
cast<Constant>(FBO->getOperand(1)), Op1);
@@ -601,6 +631,8 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Type *Ty = I.getType();
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+
const APInt *ShAmtAPInt;
if (match(Op1, m_APInt(ShAmtAPInt))) {
unsigned ShAmt = ShAmtAPInt->getZExtValue();
@@ -689,6 +721,12 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
return BinaryOperator::CreateMul(X, ConstantExpr::getShl(C2, C1));
}
+ // (1 << (C - x)) -> ((1 << C) >> x) if C is bitwidth - 1
+ if (match(Op0, m_One()) &&
+ match(Op1, m_Sub(m_SpecificInt(BitWidth - 1), m_Value(X))))
+ return BinaryOperator::CreateLShr(
+ ConstantInt::get(Ty, APInt::getSignMask(BitWidth)), X);
+
return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 9bf87d024607..e0d85c4b49ae 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1,9 +1,8 @@
//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -366,10 +365,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
KnownBits InputKnown(SrcBitWidth);
if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1))
return I;
- Known = InputKnown.zextOrTrunc(BitWidth);
- // Any top bits are known to be zero.
- if (BitWidth > SrcBitWidth)
- Known.Zero.setBitsFrom(SrcBitWidth);
+ assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?");
+ Known = InputKnown.zextOrTrunc(BitWidth,
+ true /* ExtendedBitsAreKnownZero */);
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
break;
}
@@ -967,26 +965,16 @@ InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
}
/// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
+///
+/// Note: This only supports non-TFE/LWE image intrinsic calls; those have
+/// struct returns.
Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
APInt DemandedElts,
- int DMaskIdx,
- int TFCIdx) {
+ int DMaskIdx) {
unsigned VWidth = II->getType()->getVectorNumElements();
if (VWidth == 1)
return nullptr;
- // Need to change to new instruction format
- ConstantInt *TFC = nullptr;
- bool TFELWEEnabled = false;
- if (TFCIdx > 0) {
- TFC = dyn_cast<ConstantInt>(II->getArgOperand(TFCIdx));
- TFELWEEnabled = TFC->getZExtValue() & 0x1 // TFE
- || TFC->getZExtValue() & 0x2; // LWE
- }
-
- if (TFELWEEnabled)
- return nullptr; // TFE not yet supported
-
ConstantInt *NewDMask = nullptr;
if (DMaskIdx < 0) {
@@ -994,10 +982,7 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
// below.
DemandedElts = (1 << DemandedElts.getActiveBits()) - 1;
} else {
- ConstantInt *DMask = dyn_cast<ConstantInt>(II->getArgOperand(DMaskIdx));
- if (!DMask)
- return nullptr; // non-constant dmask is not supported by codegen
-
+ ConstantInt *DMask = cast<ConstantInt>(II->getArgOperand(DMaskIdx));
unsigned DMaskVal = DMask->getZExtValue() & 0xf;
// Mask off values that are undefined because the dmask doesn't cover them
@@ -1018,8 +1003,7 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
NewDMask = ConstantInt::get(DMask->getType(), NewDMaskVal);
}
- // TODO: Handle 3 vectors when supported in code gen.
- unsigned NewNumElts = PowerOf2Ceil(DemandedElts.countPopulation());
+ unsigned NewNumElts = DemandedElts.countPopulation();
if (!NewNumElts)
return UndefValue::get(II->getType());
@@ -1035,13 +1019,12 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
getIntrinsicInfoTableEntries(IID, Table);
ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
+ // Validate function argument and return types, extracting overloaded types
+ // along the way.
FunctionType *FTy = II->getCalledFunction()->getFunctionType();
SmallVector<Type *, 6> OverloadTys;
- Intrinsic::matchIntrinsicType(FTy->getReturnType(), TableRef, OverloadTys);
- for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- Intrinsic::matchIntrinsicType(FTy->getParamType(i), TableRef, OverloadTys);
+ Intrinsic::matchIntrinsicSignature(FTy, TableRef, OverloadTys);
- // Get the new return type overload of the intrinsic.
Module *M = II->getParent()->getParent()->getParent();
Type *EltTy = II->getType()->getVectorElementType();
Type *NewTy = (NewNumElts == 1) ? EltTy : VectorType::get(EltTy, NewNumElts);
@@ -1184,6 +1167,39 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
switch (I->getOpcode()) {
default: break;
+ case Instruction::GetElementPtr: {
+ // The LangRef requires that struct geps have all constant indices. As
+ // such, we can't convert any operand to partial undef.
+ auto mayIndexStructType = [](GetElementPtrInst &GEP) {
+ for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP);
+ I != E; I++)
+ if (I.isStruct())
+ return true;;
+ return false;
+ };
+ if (mayIndexStructType(cast<GetElementPtrInst>(*I)))
+ break;
+
+ // Conservatively track the demanded elements back through any vector
+ // operands we may have. We know there must be at least one, or we
+ // wouldn't have a vector result to get here. Note that we intentionally
+ // merge the undef bits here since gepping with either an undef base or
+ // index results in undef.
+ for (unsigned i = 0; i < I->getNumOperands(); i++) {
+ if (isa<UndefValue>(I->getOperand(i))) {
+ // If the entire vector is undefined, just return this info.
+ UndefElts = EltMask;
+ return nullptr;
+ }
+ if (I->getOperand(i)->getType()->isVectorTy()) {
+ APInt UndefEltsOp(VWidth, 0);
+ simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp);
+ UndefElts |= UndefEltsOp;
+ }
+ }
+
+ break;
+ }
case Instruction::InsertElement: {
// If this is a variable index, we don't know which element it overwrites.
// demand exactly the same input as we produce.
@@ -1430,6 +1446,30 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
if (!II) break;
switch (II->getIntrinsicID()) {
+ case Intrinsic::masked_gather: // fallthrough
+ case Intrinsic::masked_load: {
+ // Subtlety: If we load from a pointer, the pointer must be valid
+ // regardless of whether the element is demanded. Doing otherwise risks
+ // segfaults which didn't exist in the original program.
+ APInt DemandedPtrs(APInt::getAllOnesValue(VWidth)),
+ DemandedPassThrough(DemandedElts);
+ if (auto *CV = dyn_cast<ConstantVector>(II->getOperand(2)))
+ for (unsigned i = 0; i < VWidth; i++) {
+ Constant *CElt = CV->getAggregateElement(i);
+ if (CElt->isNullValue())
+ DemandedPtrs.clearBit(i);
+ else if (CElt->isAllOnesValue())
+ DemandedPassThrough.clearBit(i);
+ }
+ if (II->getIntrinsicID() == Intrinsic::masked_gather)
+ simplifyAndSetOp(II, 0, DemandedPtrs, UndefElts2);
+ simplifyAndSetOp(II, 3, DemandedPassThrough, UndefElts3);
+
+ // Output elements are undefined if the element from both sources are.
+ // TODO: can strengthen via mask as well.
+ UndefElts = UndefElts2 & UndefElts3;
+ break;
+ }
case Intrinsic::x86_xop_vfrcz_ss:
case Intrinsic::x86_xop_vfrcz_sd:
// The instructions for these intrinsics are speced to zero upper bits not
@@ -1639,8 +1679,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts);
default: {
if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID()))
- return simplifyAMDGCNMemoryIntrinsicDemanded(
- II, DemandedElts, 0, II->getNumArgOperands() - 2);
+ return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, 0);
break;
}
@@ -1667,5 +1706,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
UndefElts &= UndefElts2;
}
+ // If we've proven all of the lanes undef, return an undef value.
+ // TODO: Intersect w/demanded lanes
+ if (UndefElts.isAllOnesValue())
+ return UndefValue::get(I->getType());;
+
return MadeChange ? I : nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 0ad1fc0e791f..dc9abdd7f47a 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1,9 +1,8 @@
//===- InstCombineVectorOps.cpp -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -663,18 +662,17 @@ static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
return true;
}
-// Turn a chain of inserts that splats a value into a canonical insert + shuffle
-// splat. That is:
-// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
-// shufflevector(insertelt(X, %k, 0), undef, zero)
-static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
- // We are interested in the last insert in a chain. So, if this insert
- // has a single user, and that user is an insert, bail.
+/// Turn a chain of inserts that splats a value into an insert + shuffle:
+/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
+/// shufflevector(insertelt(X, %k, 0), undef, zero)
+static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
+ // We are interested in the last insert in a chain. So if this insert has a
+ // single user and that user is an insert, bail.
if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
return nullptr;
- VectorType *VT = cast<VectorType>(InsElt.getType());
- int NumElements = VT->getNumElements();
+ auto *VecTy = cast<VectorType>(InsElt.getType());
+ unsigned NumElements = VecTy->getNumElements();
// Do not try to do this for a one-element vector, since that's a nop,
// and will cause an inf-loop.
@@ -706,24 +704,66 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
CurrIE = NextIE;
}
- // Make sure we've seen an insert into every element.
- if (llvm::any_of(ElementPresent, [](bool Present) { return !Present; }))
+ // If this is just a single insertelement (not a sequence), we are done.
+ if (FirstIE == &InsElt)
return nullptr;
- // All right, create the insert + shuffle.
- Instruction *InsertFirst;
- if (cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
- InsertFirst = FirstIE;
- else
- InsertFirst = InsertElementInst::Create(
- UndefValue::get(VT), SplatVal,
- ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), 0),
- "", &InsElt);
+ // If we are not inserting into an undef vector, make sure we've seen an
+ // insert into every element.
+ // TODO: If the base vector is not undef, it might be better to create a splat
+ // and then a select-shuffle (blend) with the base vector.
+ if (!isa<UndefValue>(FirstIE->getOperand(0)))
+ if (any_of(ElementPresent, [](bool Present) { return !Present; }))
+ return nullptr;
+
+ // Create the insert + shuffle.
+ Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
+ UndefValue *UndefVec = UndefValue::get(VecTy);
+ Constant *Zero = ConstantInt::get(Int32Ty, 0);
+ if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
+ FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt);
- Constant *ZeroMask = ConstantAggregateZero::get(
- VectorType::get(Type::getInt32Ty(InsElt.getContext()), NumElements));
+ // Splat from element 0, but replace absent elements with undef in the mask.
+ SmallVector<Constant *, 16> Mask(NumElements, Zero);
+ for (unsigned i = 0; i != NumElements; ++i)
+ if (!ElementPresent[i])
+ Mask[i] = UndefValue::get(Int32Ty);
- return new ShuffleVectorInst(InsertFirst, UndefValue::get(VT), ZeroMask);
+ return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask));
+}
+
+/// Try to fold an insert element into an existing splat shuffle by changing
+/// the shuffle's mask to include the index of this insert element.
+static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
+ // Check if the vector operand of this insert is a canonical splat shuffle.
+ auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
+ if (!Shuf || !Shuf->isZeroEltSplat())
+ return nullptr;
+
+ // Check for a constant insertion index.
+ uint64_t IdxC;
+ if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
+ return nullptr;
+
+ // Check if the splat shuffle's input is the same as this insert's scalar op.
+ Value *X = InsElt.getOperand(1);
+ Value *Op0 = Shuf->getOperand(0);
+ if (!match(Op0, m_InsertElement(m_Undef(), m_Specific(X), m_ZeroInt())))
+ return nullptr;
+
+ // Replace the shuffle mask element at the index of this insert with a zero.
+ // For example:
+ // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
+ // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
+ unsigned NumMaskElts = Shuf->getType()->getVectorNumElements();
+ SmallVector<Constant *, 16> NewMaskVec(NumMaskElts);
+ Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext());
+ Constant *Zero = ConstantInt::getNullValue(I32Ty);
+ for (unsigned i = 0; i != NumMaskElts; ++i)
+ NewMaskVec[i] = i == IdxC ? Zero : Shuf->getMask()->getAggregateElement(i);
+
+ Constant *NewMask = ConstantVector::get(NewMaskVec);
+ return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
}
/// If we have an insertelement instruction feeding into another insertelement
@@ -864,30 +904,28 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
return replaceInstUsesWith(IE, V);
- // Inserting an undef or into an undefined place, remove this.
- if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
- replaceInstUsesWith(IE, VecOp);
+ // If the vector and scalar are both bitcast from the same element type, do
+ // the insert in that source type followed by bitcast.
+ Value *VecSrc, *ScalarSrc;
+ if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
+ match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
+ (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
+ VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
+ VecSrc->getType()->getVectorElementType() == ScalarSrc->getType()) {
+ // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
+ // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
+ Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
+ return new BitCastInst(NewInsElt, IE.getType());
+ }
// If the inserted element was extracted from some other vector and both
- // indexes are constant, try to turn this into a shuffle.
+ // indexes are valid constants, try to turn this into a shuffle.
uint64_t InsertedIdx, ExtractedIdx;
Value *ExtVecOp;
if (match(IdxOp, m_ConstantInt(InsertedIdx)) &&
match(ScalarOp, m_ExtractElement(m_Value(ExtVecOp),
- m_ConstantInt(ExtractedIdx)))) {
- unsigned NumInsertVectorElts = IE.getType()->getNumElements();
- unsigned NumExtractVectorElts = ExtVecOp->getType()->getVectorNumElements();
- if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract.
- return replaceInstUsesWith(IE, VecOp);
-
- if (InsertedIdx >= NumInsertVectorElts) // Out of range insert.
- return replaceInstUsesWith(IE, UndefValue::get(IE.getType()));
-
- // If we are extracting a value from a vector, then inserting it right
- // back into the same place, just use the input vector.
- if (ExtVecOp == VecOp && ExtractedIdx == InsertedIdx)
- return replaceInstUsesWith(IE, VecOp);
-
+ m_ConstantInt(ExtractedIdx))) &&
+ ExtractedIdx < ExtVecOp->getType()->getVectorNumElements()) {
// TODO: Looking at the user(s) to determine if this insert is a
// fold-to-shuffle opportunity does not match the usual instcombine
// constraints. We should decide if the transform is worthy based only
@@ -943,11 +981,12 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
return NewInsElt;
- // Turn a sequence of inserts that broadcasts a scalar into a single
- // insert + shufflevector.
- if (Instruction *Broadcast = foldInsSequenceIntoBroadcast(IE))
+ if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
return Broadcast;
+ if (Instruction *Splat = foldInsEltIntoSplat(IE))
+ return Splat;
+
return nullptr;
}
@@ -1172,7 +1211,14 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
SmallVector<Value*, 8> NewOps;
bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
- Value *V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
+ Value *V;
+ // Recursively call evaluateInDifferentElementOrder on vector arguments
+ // as well. E.g. GetElementPtr may have scalar operands even if the
+ // return value is a vector, so we need to examine the operand type.
+ if (I->getOperand(i)->getType()->isVectorTy())
+ V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
+ else
+ V = I->getOperand(i);
NewOps.push_back(V);
NeedsRebuild |= (V != I->getOperand(i));
}
@@ -1337,6 +1383,41 @@ static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
return NewBO;
}
+/// If we have an insert of a scalar to a non-zero element of an undefined
+/// vector and then shuffle that value, that's the same as inserting to the zero
+/// element and shuffling. Splatting from the zero element is recognized as the
+/// canonical form of splat.
+static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
+ InstCombiner::BuilderTy &Builder) {
+ Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
+ Constant *Mask = Shuf.getMask();
+ Value *X;
+ uint64_t IndexC;
+
+ // Match a shuffle that is a splat to a non-zero element.
+ if (!match(Op0, m_OneUse(m_InsertElement(m_Undef(), m_Value(X),
+ m_ConstantInt(IndexC)))) ||
+ !match(Op1, m_Undef()) || match(Mask, m_ZeroInt()) || IndexC == 0)
+ return nullptr;
+
+ // Insert into element 0 of an undef vector.
+ UndefValue *UndefVec = UndefValue::get(Shuf.getType());
+ Constant *Zero = Builder.getInt32(0);
+ Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
+
+ // Splat from element 0. Any mask element that is undefined remains undefined.
+ // For example:
+ // shuf (inselt undef, X, 2), undef, <2,2,undef>
+ // --> shuf (inselt undef, X, 0), undef, <0,0,undef>
+ unsigned NumMaskElts = Shuf.getType()->getVectorNumElements();
+ SmallVector<Constant *, 16> NewMask(NumMaskElts, Zero);
+ for (unsigned i = 0; i != NumMaskElts; ++i)
+ if (isa<UndefValue>(Mask->getAggregateElement(i)))
+ NewMask[i] = Mask->getAggregateElement(i);
+
+ return new ShuffleVectorInst(NewIns, UndefVec, ConstantVector::get(NewMask));
+}
+
/// Try to fold shuffles that are the equivalent of a vector select.
static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
InstCombiner::BuilderTy &Builder,
@@ -1344,6 +1425,15 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
if (!Shuf.isSelect())
return nullptr;
+ // Canonicalize to choose from operand 0 first.
+ unsigned NumElts = Shuf.getType()->getVectorNumElements();
+ if (Shuf.getMaskValue(0) >= (int)NumElts) {
+ // TODO: Can we assert that both operands of a shuffle-select are not undef
+ // (otherwise, it would have been folded by instsimplify?
+ Shuf.commute();
+ return &Shuf;
+ }
+
if (Instruction *I = foldSelectShuffleWith1Binop(Shuf))
return I;
@@ -1499,6 +1589,11 @@ static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Constant(Mask))))
return nullptr;
+ // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
+ // then combining may result in worse codegen.
+ if (!Op0->hasOneUse())
+ return nullptr;
+
// We are extracting a subvector from a shuffle. Remove excess elements from
// the 1st shuffle mask to eliminate the extract.
//
@@ -1588,6 +1683,72 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf) {
return nullptr;
}
+static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
+ // Match the operands as identity with padding (also known as concatenation
+ // with undef) shuffles of the same source type. The backend is expected to
+ // recreate these concatenations from a shuffle of narrow operands.
+ auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
+ auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
+ if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
+ !Shuffle1 || !Shuffle1->isIdentityWithPadding())
+ return nullptr;
+
+ // We limit this transform to power-of-2 types because we expect that the
+ // backend can convert the simplified IR patterns to identical nodes as the
+ // original IR.
+ // TODO: If we can verify the same behavior for arbitrary types, the
+ // power-of-2 checks can be removed.
+ Value *X = Shuffle0->getOperand(0);
+ Value *Y = Shuffle1->getOperand(0);
+ if (X->getType() != Y->getType() ||
+ !isPowerOf2_32(Shuf.getType()->getVectorNumElements()) ||
+ !isPowerOf2_32(Shuffle0->getType()->getVectorNumElements()) ||
+ !isPowerOf2_32(X->getType()->getVectorNumElements()) ||
+ isa<UndefValue>(X) || isa<UndefValue>(Y))
+ return nullptr;
+ assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&
+ isa<UndefValue>(Shuffle1->getOperand(1)) &&
+ "Unexpected operand for identity shuffle");
+
+ // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
+ // operands directly by adjusting the shuffle mask to account for the narrower
+ // types:
+ // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
+ int NarrowElts = X->getType()->getVectorNumElements();
+ int WideElts = Shuffle0->getType()->getVectorNumElements();
+ assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
+
+ Type *I32Ty = IntegerType::getInt32Ty(Shuf.getContext());
+ SmallVector<int, 16> Mask = Shuf.getShuffleMask();
+ SmallVector<Constant *, 16> NewMask(Mask.size(), UndefValue::get(I32Ty));
+ for (int i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] == -1)
+ continue;
+
+ // If this shuffle is choosing an undef element from 1 of the sources, that
+ // element is undef.
+ if (Mask[i] < WideElts) {
+ if (Shuffle0->getMaskValue(Mask[i]) == -1)
+ continue;
+ } else {
+ if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
+ continue;
+ }
+
+ // If this shuffle is choosing from the 1st narrow op, the mask element is
+ // the same. If this shuffle is choosing from the 2nd narrow op, the mask
+ // element is offset down to adjust for the narrow vector widths.
+ if (Mask[i] < WideElts) {
+ assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
+ NewMask[i] = ConstantInt::get(I32Ty, Mask[i]);
+ } else {
+ assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
+ NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts));
+ }
+ }
+ return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
+}
+
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
@@ -1595,36 +1756,12 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI)))
return replaceInstUsesWith(SVI, V);
- if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
- return I;
-
- if (Instruction *I = narrowVectorSelect(SVI, Builder))
- return I;
-
+ // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
+ // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
unsigned VWidth = SVI.getType()->getVectorNumElements();
- APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
- if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
- if (V != &SVI)
- return replaceInstUsesWith(SVI, V);
- return &SVI;
- }
-
- if (Instruction *I = foldIdentityExtractShuffle(SVI))
- return I;
-
- // This transform has the potential to lose undef knowledge, so it is
- // intentionally placed after SimplifyDemandedVectorElts().
- if (Instruction *I = foldShuffleWithInsert(SVI))
- return I;
-
+ unsigned LHSWidth = LHS->getType()->getVectorNumElements();
SmallVector<int, 16> Mask = SVI.getShuffleMask();
Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
- unsigned LHSWidth = LHS->getType()->getVectorNumElements();
- bool MadeChange = false;
-
- // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
- // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
if (LHS == RHS || isa<UndefValue>(LHS)) {
// Remap any references to RHS to use LHS.
SmallVector<Constant*, 16> Elts;
@@ -1646,11 +1783,36 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
SVI.setOperand(0, SVI.getOperand(1));
SVI.setOperand(1, UndefValue::get(RHS->getType()));
SVI.setOperand(2, ConstantVector::get(Elts));
- LHS = SVI.getOperand(0);
- RHS = SVI.getOperand(1);
- MadeChange = true;
+ return &SVI;
}
+ if (Instruction *I = canonicalizeInsertSplat(SVI, Builder))
+ return I;
+
+ if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
+ return I;
+
+ if (Instruction *I = narrowVectorSelect(SVI, Builder))
+ return I;
+
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+ if (V != &SVI)
+ return replaceInstUsesWith(SVI, V);
+ return &SVI;
+ }
+
+ if (Instruction *I = foldIdentityExtractShuffle(SVI))
+ return I;
+
+ // These transforms have the potential to lose undef knowledge, so they are
+ // intentionally placed after SimplifyDemandedVectorElts().
+ if (Instruction *I = foldShuffleWithInsert(SVI))
+ return I;
+ if (Instruction *I = foldIdentityPaddedShuffles(SVI))
+ return I;
+
if (VWidth == LHSWidth) {
// Analyze the shuffle, are the LHS or RHS and identity shuffles?
bool isLHSID, isRHSID;
@@ -1695,6 +1857,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// +-----------+-----------+-----------+-----------+
// Index range [6,10): ^-----------^ Needs an extra shuffle.
// Target type i40: ^--------------^ Won't work, bail.
+ bool MadeChange = false;
if (isShuffleExtractingFromLHS(SVI, Mask)) {
Value *V = LHS;
unsigned MaskElems = Mask.size();
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index be7d43bbcf2c..385f4926b845 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1,9 +1,8 @@
//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -47,14 +46,17 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -221,6 +223,11 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
return !Overflow;
}
+static bool hasNoUnsignedWrap(BinaryOperator &I) {
+ OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
+ return OBO && OBO->hasNoUnsignedWrap();
+}
+
/// Conservatively clears subclassOptionalData after a reassociation or
/// commutation. We preserve fast-math flags when applicable as they can be
/// preserved.
@@ -327,14 +334,19 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
I.setOperand(1, V);
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
- if (MaintainNoSignedWrap(I, B, C) &&
+ bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
+ bool IsNSW = MaintainNoSignedWrap(I, B, C);
+
+ ClearSubclassDataAfterReassociation(I);
+
+ if (IsNUW)
+ I.setHasNoUnsignedWrap(true);
+
+ if (IsNSW &&
(!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) {
// Note: this is only valid because SimplifyBinOp doesn't look at
// the operands to Op0.
- I.clearSubclassOptionalData();
I.setHasNoSignedWrap(true);
- } else {
- ClearSubclassDataAfterReassociation(I);
}
Changed = true;
@@ -419,8 +431,14 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2))))) {
- BinaryOperator *NewBO = BinaryOperator::Create(Opcode, A, B);
- if (isa<FPMathOperator>(NewBO)) {
+ bool IsNUW = hasNoUnsignedWrap(I) &&
+ hasNoUnsignedWrap(*Op0) &&
+ hasNoUnsignedWrap(*Op1);
+ BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
+ BinaryOperator::CreateNUW(Opcode, A, B) :
+ BinaryOperator::Create(Opcode, A, B);
+
+ if (isa<FPMathOperator>(NewBO)) {
FastMathFlags Flags = I.getFastMathFlags();
Flags &= Op0->getFastMathFlags();
Flags &= Op1->getFastMathFlags();
@@ -433,6 +451,8 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
ClearSubclassDataAfterReassociation(I);
+ if (IsNUW)
+ I.setHasNoUnsignedWrap(true);
Changed = true;
continue;
@@ -570,32 +590,44 @@ Value *InstCombiner::tryFactorization(BinaryOperator &I,
++NumFactor;
SimplifiedInst->takeName(&I);
- // Check if we can add NSW flag to SimplifiedInst. If so, set NSW flag.
- // TODO: Check for NUW.
+ // Check if we can add NSW/NUW flags to SimplifiedInst. If so, set them.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SimplifiedInst)) {
if (isa<OverflowingBinaryOperator>(SimplifiedInst)) {
bool HasNSW = false;
- if (isa<OverflowingBinaryOperator>(&I))
+ bool HasNUW = false;
+ if (isa<OverflowingBinaryOperator>(&I)) {
HasNSW = I.hasNoSignedWrap();
+ HasNUW = I.hasNoUnsignedWrap();
+ }
- if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS))
+ if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
HasNSW &= LOBO->hasNoSignedWrap();
+ HasNUW &= LOBO->hasNoUnsignedWrap();
+ }
- if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS))
+ if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
HasNSW &= ROBO->hasNoSignedWrap();
+ HasNUW &= ROBO->hasNoUnsignedWrap();
+ }
- // We can propagate 'nsw' if we know that
- // %Y = mul nsw i16 %X, C
- // %Z = add nsw i16 %Y, %X
- // =>
- // %Z = mul nsw i16 %X, C+1
- //
- // iff C+1 isn't INT_MIN
const APInt *CInt;
if (TopLevelOpcode == Instruction::Add &&
- InnerOpcode == Instruction::Mul)
- if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
- BO->setHasNoSignedWrap(HasNSW);
+ InnerOpcode == Instruction::Mul) {
+ // We can propagate 'nsw' if we know that
+ // %Y = mul nsw i16 %X, C
+ // %Z = add nsw i16 %Y, %X
+ // =>
+ // %Z = mul nsw i16 %X, C+1
+ //
+ // iff C+1 isn't INT_MIN
+ if (match(V, m_APInt(CInt))) {
+ if (!CInt->isMinSignedValue())
+ BO->setHasNoSignedWrap(HasNSW);
+ }
+
+ // nuw can be propagated with any constant or nuw value.
+ BO->setHasNoUnsignedWrap(HasNUW);
+ }
}
}
}
@@ -922,8 +954,8 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
// If the InVal is an invoke at the end of the pred block, then we can't
// insert a computation after it without breaking the edge.
- if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
- if (II->getParent() == NonConstBB)
+ if (isa<InvokeInst>(InVal))
+ if (cast<Instruction>(InVal)->getParent() == NonConstBB)
return nullptr;
// If the incoming non-constant value is in I's block, we will remove one
@@ -1376,7 +1408,8 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
if (match(LHS, m_ShuffleVector(m_Value(L0), m_Value(L1), m_Constant(Mask))) &&
match(RHS, m_ShuffleVector(m_Value(R0), m_Value(R1), m_Specific(Mask))) &&
LHS->hasOneUse() && RHS->hasOneUse() &&
- cast<ShuffleVectorInst>(LHS)->isConcat()) {
+ cast<ShuffleVectorInst>(LHS)->isConcat() &&
+ cast<ShuffleVectorInst>(RHS)->isConcat()) {
// This transform does not have the speculative execution constraint as
// below because the shuffle is a concatenation. The new binops are
// operating on exactly the same elements as the existing binop.
@@ -1415,6 +1448,30 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
return createBinOpShuffle(V1, V2, Mask);
}
+ // If both arguments of a commutative binop are select-shuffles that use the
+ // same mask with commuted operands, the shuffles are unnecessary.
+ if (Inst.isCommutative() &&
+ match(LHS, m_ShuffleVector(m_Value(V1), m_Value(V2), m_Constant(Mask))) &&
+ match(RHS, m_ShuffleVector(m_Specific(V2), m_Specific(V1),
+ m_Specific(Mask)))) {
+ auto *LShuf = cast<ShuffleVectorInst>(LHS);
+ auto *RShuf = cast<ShuffleVectorInst>(RHS);
+ // TODO: Allow shuffles that contain undefs in the mask?
+ // That is legal, but it reduces undef knowledge.
+ // TODO: Allow arbitrary shuffles by shuffling after binop?
+ // That might be legal, but we have to deal with poison.
+ if (LShuf->isSelect() && !LShuf->getMask()->containsUndefElement() &&
+ RShuf->isSelect() && !RShuf->getMask()->containsUndefElement()) {
+ // Example:
+ // LHS = shuffle V1, V2, <0, 5, 6, 3>
+ // RHS = shuffle V2, V1, <0, 5, 6, 3>
+ // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
+ Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
+ NewBO->copyIRFlags(&Inst);
+ return NewBO;
+ }
+ }
+
// If one argument is a shuffle within one vector and the other is a constant,
// try moving the shuffle after the binary operation. This canonicalization
// intends to move shuffles closer to other shuffles and binops closer to
@@ -1557,6 +1614,23 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (Value *V = SimplifyGEPInst(GEPEltType, Ops, SQ.getWithInstruction(&GEP)))
return replaceInstUsesWith(GEP, V);
+ // For vector geps, use the generic demanded vector support.
+ if (GEP.getType()->isVectorTy()) {
+ auto VWidth = GEP.getType()->getVectorNumElements();
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
+ UndefElts)) {
+ if (V != &GEP)
+ return replaceInstUsesWith(GEP, V);
+ return &GEP;
+ }
+
+ // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
+ // possible (decide on canonical form for pointer broadcast), 3) exploit
+ // undef elements to decrease demanded bits
+ }
+
Value *PtrOp = GEP.getOperand(0);
// Eliminate unneeded casts for indices, and replace indices which displace
@@ -1755,9 +1829,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// put NewSrc at same location as %src
Builder.SetInsertPoint(cast<Instruction>(PtrOp));
auto *NewSrc = cast<GetElementPtrInst>(
- Builder.CreateGEP(SO0, GO1, Src->getName()));
+ Builder.CreateGEP(GEPEltType, SO0, GO1, Src->getName()));
NewSrc->setIsInBounds(Src->isInBounds());
- auto *NewGEP = GetElementPtrInst::Create(nullptr, NewSrc, {SO1});
+ auto *NewGEP = GetElementPtrInst::Create(GEPEltType, NewSrc, {SO1});
NewGEP->setIsInBounds(GEP.isInBounds());
return NewGEP;
}
@@ -1881,6 +1955,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (StrippedPtr != PtrOp) {
bool HasZeroPointerIndex = false;
+ Type *StrippedPtrEltTy = StrippedPtrTy->getElementType();
+
if (auto *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
HasZeroPointerIndex = C->isZero();
@@ -1894,11 +1970,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (HasZeroPointerIndex) {
if (auto *CATy = dyn_cast<ArrayType>(GEPEltType)) {
// GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
- if (CATy->getElementType() == StrippedPtrTy->getElementType()) {
+ if (CATy->getElementType() == StrippedPtrEltTy) {
// -> GEP i8* X, ...
SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
GetElementPtrInst *Res = GetElementPtrInst::Create(
- StrippedPtrTy->getElementType(), StrippedPtr, Idx, GEP.getName());
+ StrippedPtrEltTy, StrippedPtr, Idx, GEP.getName());
Res->setIsInBounds(GEP.isInBounds());
if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
return Res;
@@ -1911,7 +1987,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return new AddrSpaceCastInst(Builder.Insert(Res), GEPType);
}
- if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrTy->getElementType())) {
+ if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrEltTy)) {
// GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
if (CATy->getElementType() == XATy->getElementType()) {
// -> GEP [10 x i8]* X, i32 0, ...
@@ -1934,11 +2010,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// %0 = GEP [10 x i8] addrspace(1)* X, ...
// addrspacecast i8 addrspace(1)* %0 to i8*
SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
- Value *NewGEP = GEP.isInBounds()
- ? Builder.CreateInBoundsGEP(
- nullptr, StrippedPtr, Idx, GEP.getName())
- : Builder.CreateGEP(nullptr, StrippedPtr, Idx,
- GEP.getName());
+ Value *NewGEP =
+ GEP.isInBounds()
+ ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
+ Idx, GEP.getName())
+ : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
+ GEP.getName());
return new AddrSpaceCastInst(NewGEP, GEPType);
}
}
@@ -1947,17 +2024,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Transform things like:
// %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
// into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
- Type *SrcEltTy = StrippedPtrTy->getElementType();
- if (SrcEltTy->isArrayTy() &&
- DL.getTypeAllocSize(SrcEltTy->getArrayElementType()) ==
+ if (StrippedPtrEltTy->isArrayTy() &&
+ DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) ==
DL.getTypeAllocSize(GEPEltType)) {
Type *IdxType = DL.getIndexType(GEPType);
Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
Value *NewGEP =
GEP.isInBounds()
- ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, Idx,
+ ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, Idx,
GEP.getName())
- : Builder.CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName());
+ : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
+ GEP.getName());
// V and GEP are both pointer types --> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType);
@@ -1967,11 +2044,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// %V = mul i64 %N, 4
// %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
// into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
- if (GEPEltType->isSized() && SrcEltTy->isSized()) {
+ if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) {
// Check that changing the type amounts to dividing the index by a scale
// factor.
uint64_t ResSize = DL.getTypeAllocSize(GEPEltType);
- uint64_t SrcSize = DL.getTypeAllocSize(SrcEltTy);
+ uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy);
if (ResSize && SrcSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1990,9 +2067,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// GEP may not be "inbounds".
Value *NewGEP =
GEP.isInBounds() && NSW
- ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx,
- GEP.getName())
- : Builder.CreateGEP(nullptr, StrippedPtr, NewIdx,
+ ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
+ NewIdx, GEP.getName())
+ : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx,
GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
@@ -2006,13 +2083,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
// (where tmp = 8*tmp2) into:
// getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
- if (GEPEltType->isSized() && SrcEltTy->isSized() &&
- SrcEltTy->isArrayTy()) {
+ if (GEPEltType->isSized() && StrippedPtrEltTy->isSized() &&
+ StrippedPtrEltTy->isArrayTy()) {
// Check that changing to the array element type amounts to dividing the
// index by a scale factor.
uint64_t ResSize = DL.getTypeAllocSize(GEPEltType);
uint64_t ArrayEltSize =
- DL.getTypeAllocSize(SrcEltTy->getArrayElementType());
+ DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType());
if (ResSize && ArrayEltSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -2032,11 +2109,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Type *IndTy = DL.getIndexType(GEPType);
Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx};
- Value *NewGEP = GEP.isInBounds() && NSW
- ? Builder.CreateInBoundsGEP(
- SrcEltTy, StrippedPtr, Off, GEP.getName())
- : Builder.CreateGEP(SrcEltTy, StrippedPtr, Off,
- GEP.getName());
+ Value *NewGEP =
+ GEP.isInBounds() && NSW
+ ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
+ Off, GEP.getName())
+ : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off,
+ GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
GEPType);
@@ -2084,8 +2162,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// constructing an AddrSpaceCastInst
Value *NGEP =
GEP.isInBounds()
- ? Builder.CreateInBoundsGEP(nullptr, SrcOp, {Ops[1], Ops[2]})
- : Builder.CreateGEP(nullptr, SrcOp, {Ops[1], Ops[2]});
+ ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, {Ops[1], Ops[2]})
+ : Builder.CreateGEP(SrcEltType, SrcOp, {Ops[1], Ops[2]});
NGEP->takeName(&GEP);
// Preserve GEP address space to satisfy users
@@ -2132,8 +2210,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (FindElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices)) {
Value *NGEP =
GEP.isInBounds()
- ? Builder.CreateInBoundsGEP(nullptr, SrcOp, NewIndices)
- : Builder.CreateGEP(nullptr, SrcOp, NewIndices);
+ ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, NewIndices)
+ : Builder.CreateGEP(SrcEltType, SrcOp, NewIndices);
if (NGEP->getType() == GEPType)
return replaceInstUsesWith(GEP, NGEP);
@@ -2159,7 +2237,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
APInt AllocSize(IdxWidth, DL.getTypeAllocSize(AI->getAllocatedType()));
if (BasePtrOffset.ule(AllocSize)) {
return GetElementPtrInst::CreateInBounds(
- PtrOp, makeArrayRef(Ops).slice(1), GEP.getName());
+ GEP.getSourceElementType(), PtrOp, makeArrayRef(Ops).slice(1),
+ GEP.getName());
}
}
}
@@ -2296,8 +2375,8 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
if (II->getIntrinsicID() == Intrinsic::objectsize) {
- ConstantInt *Result = lowerObjectSizeCall(II, DL, &TLI,
- /*MustSucceed=*/true);
+ Value *Result =
+ lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/true);
replaceInstUsesWith(*I, Result);
eraseInstFromFunction(*I);
Users[i] = nullptr; // Skip examining in the next loop.
@@ -2426,9 +2505,8 @@ Instruction *InstCombiner::visitFree(CallInst &FI) {
// free undef -> unreachable.
if (isa<UndefValue>(Op)) {
- // Insert a new store to null because we cannot modify the CFG here.
- Builder.CreateStore(ConstantInt::getTrue(FI.getContext()),
- UndefValue::get(Type::getInt1PtrTy(FI.getContext())));
+ // Leave a marker since we can't modify the CFG here.
+ CreateNonTerminatorUnreachable(&FI);
return eraseInstFromFunction(FI);
}
@@ -2618,53 +2696,28 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
return ExtractValueInst::Create(IV->getInsertedValueOperand(),
makeArrayRef(exti, exte));
}
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
- // We're extracting from an intrinsic, see if we're the only user, which
- // allows us to simplify multiple result intrinsics to simpler things that
- // just get one value.
- if (II->hasOneUse()) {
- // Check if we're grabbing the overflow bit or the result of a 'with
- // overflow' intrinsic. If it's the latter we can remove the intrinsic
+ if (WithOverflowInst *WO = dyn_cast<WithOverflowInst>(Agg)) {
+ // We're extracting from an overflow intrinsic, see if we're the only user,
+ // which allows us to simplify multiple result intrinsics to simpler
+ // things that just get one value.
+ if (WO->hasOneUse()) {
+ // Check if we're grabbing only the result of a 'with overflow' intrinsic
// and replace it with a traditional binary instruction.
- switch (II->getIntrinsicID()) {
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::sadd_with_overflow:
- if (*EV.idx_begin() == 0) { // Normal result.
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- replaceInstUsesWith(*II, UndefValue::get(II->getType()));
- eraseInstFromFunction(*II);
- return BinaryOperator::CreateAdd(LHS, RHS);
- }
-
- // If the normal result of the add is dead, and the RHS is a constant,
- // we can transform this into a range comparison.
- // overflow = uadd a, -4 --> overflow = icmp ugt a, 3
- if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getArgOperand(1)))
- return new ICmpInst(ICmpInst::ICMP_UGT, II->getArgOperand(0),
- ConstantExpr::getNot(CI));
- break;
- case Intrinsic::usub_with_overflow:
- case Intrinsic::ssub_with_overflow:
- if (*EV.idx_begin() == 0) { // Normal result.
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- replaceInstUsesWith(*II, UndefValue::get(II->getType()));
- eraseInstFromFunction(*II);
- return BinaryOperator::CreateSub(LHS, RHS);
- }
- break;
- case Intrinsic::umul_with_overflow:
- case Intrinsic::smul_with_overflow:
- if (*EV.idx_begin() == 0) { // Normal result.
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- replaceInstUsesWith(*II, UndefValue::get(II->getType()));
- eraseInstFromFunction(*II);
- return BinaryOperator::CreateMul(LHS, RHS);
- }
- break;
- default:
- break;
+ if (*EV.idx_begin() == 0) {
+ Instruction::BinaryOps BinOp = WO->getBinaryOp();
+ Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
+ replaceInstUsesWith(*WO, UndefValue::get(WO->getType()));
+ eraseInstFromFunction(*WO);
+ return BinaryOperator::Create(BinOp, LHS, RHS);
}
+
+ // If the normal result of the add is dead, and the RHS is a constant,
+ // we can transform this into a range comparison.
+ // overflow = uadd a, -4 --> overflow = icmp ugt a, 3
+ if (WO->getIntrinsicID() == Intrinsic::uadd_with_overflow)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(WO->getRHS()))
+ return new ICmpInst(ICmpInst::ICMP_UGT, WO->getLHS(),
+ ConstantExpr::getNot(CI));
}
}
if (LoadInst *L = dyn_cast<LoadInst>(Agg))
@@ -2687,7 +2740,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
Builder.SetInsertPoint(L);
Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
L->getPointerOperand(), Indices);
- Instruction *NL = Builder.CreateLoad(GEP);
+ Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
// Whatever aliasing information we had for the orignal load must also
// hold for the smaller load, so propagate the annotations.
AAMDNodes Nodes;
@@ -3065,9 +3118,11 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
I->isTerminator())
return false;
- // Do not sink alloca instructions out of the entry block.
- if (isa<AllocaInst>(I) && I->getParent() ==
- &DestBlock->getParent()->getEntryBlock())
+ // Do not sink static or dynamic alloca instructions. Static allocas must
+ // remain in the entry block, and dynamic allocas must not be sunk in between
+ // a stacksave / stackrestore pair, which would incorrectly shorten its
+ // lifetime.
+ if (isa<AllocaInst>(I))
return false;
// Do not sink into catchswitch blocks.
@@ -3093,13 +3148,35 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
++NumSunkInst;
// Also sink all related debug uses from the source basic block. Otherwise we
- // get debug use before the def.
- SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
+ // get debug use before the def. Attempt to salvage debug uses first, to
+ // maximise the range variables have location for. If we cannot salvage, then
+ // mark the location undef: we know it was supposed to receive a new location
+ // here, but that computation has been sunk.
+ SmallVector<DbgVariableIntrinsic *, 2> DbgUsers;
findDbgUsers(DbgUsers, I);
- for (auto *DII : DbgUsers) {
+ for (auto *DII : reverse(DbgUsers)) {
if (DII->getParent() == SrcBlock) {
- DII->moveBefore(&*InsertPos);
- LLVM_DEBUG(dbgs() << "SINK: " << *DII << '\n');
+ // dbg.value is in the same basic block as the sunk inst, see if we can
+ // salvage it. Clone a new copy of the instruction: on success we need
+ // both salvaged and unsalvaged copies.
+ SmallVector<DbgVariableIntrinsic *, 1> TmpUser{
+ cast<DbgVariableIntrinsic>(DII->clone())};
+
+ if (!salvageDebugInfoForDbgValues(*I, TmpUser)) {
+ // We are unable to salvage: sink the cloned dbg.value, and mark the
+ // original as undef, terminating any earlier variable location.
+ LLVM_DEBUG(dbgs() << "SINK: " << *DII << '\n');
+ TmpUser[0]->insertBefore(&*InsertPos);
+ Value *Undef = UndefValue::get(I->getType());
+ DII->setOperand(0, MetadataAsValue::get(DII->getContext(),
+ ValueAsMetadata::get(Undef)));
+ } else {
+ // We successfully salvaged: place the salvaged dbg.value in the
+ // original location, and move the unmodified dbg.value to sink with
+ // the sunk inst.
+ TmpUser[0]->insertBefore(DII);
+ DII->moveBefore(&*InsertPos);
+ }
}
}
return true;
@@ -3294,7 +3371,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
if (isInstructionTriviallyDead(Inst, TLI)) {
++NumDeadInst;
LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
- salvageDebugInfo(*Inst);
+ if (!salvageDebugInfo(*Inst))
+ replaceDbgUsesWithUndef(Inst);
Inst->eraseFromParent();
MadeIRChange = true;
continue;
@@ -3407,7 +3485,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
static bool combineInstructionsOverFunction(
Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA,
AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
- OptimizationRemarkEmitter &ORE, bool ExpensiveCombines = true,
+ OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI, bool ExpensiveCombines = true,
LoopInfo *LI = nullptr) {
auto &DL = F.getParent()->getDataLayout();
ExpensiveCombines |= EnableExpensiveCombines;
@@ -3437,8 +3516,8 @@ static bool combineInstructionsOverFunction(
MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
- InstCombiner IC(Worklist, Builder, F.optForMinSize(), ExpensiveCombines, AA,
- AC, TLI, DT, ORE, DL, LI);
+ InstCombiner IC(Worklist, Builder, F.hasMinSize(), ExpensiveCombines, AA,
+ AC, TLI, DT, ORE, BFI, PSI, DL, LI);
IC.MaxArraySizeForCombine = MaxArraySize;
if (!IC.run())
@@ -3458,8 +3537,15 @@ PreservedAnalyses InstCombinePass::run(Function &F,
auto *LI = AM.getCachedResult<LoopAnalysis>(F);
auto *AA = &AM.getResult<AAManager>(F);
+ const ModuleAnalysisManager &MAM =
+ AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+ ProfileSummaryInfo *PSI =
+ MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+ &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
+
if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
- ExpensiveCombines, LI))
+ BFI, PSI, ExpensiveCombines, LI))
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
@@ -3483,6 +3569,8 @@ void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<AAResultsWrapperPass>();
AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
}
bool InstructionCombiningPass::runOnFunction(Function &F) {
@@ -3499,9 +3587,15 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
// Optional analyses.
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+ ProfileSummaryInfo *PSI =
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ BlockFrequencyInfo *BFI =
+ (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
- ExpensiveCombines, LI);
+ BFI, PSI, ExpensiveCombines, LI);
}
char InstructionCombiningPass::ID = 0;
@@ -3514,6 +3608,8 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
"Combine redundant instructions", false, false)
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index f1558c75cb90..6821e214e921 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1,9 +1,8 @@
//===- AddressSanitizer.cpp - memory error detector -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
@@ -25,7 +25,6 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/IR/Argument.h"
@@ -72,6 +71,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
@@ -94,9 +94,6 @@ static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;
static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
static const uint64_t kDynamicShadowSentinel =
std::numeric_limits<uint64_t>::max();
-static const uint64_t kIOSShadowOffset32 = 1ULL << 30;
-static const uint64_t kIOSSimShadowOffset32 = 1ULL << 30;
-static const uint64_t kIOSSimShadowOffset64 = kDefaultShadowOffset64;
static const uint64_t kSmallX86_64ShadowOffsetBase = 0x7FFFFFFF; // < 2G.
static const uint64_t kSmallX86_64ShadowOffsetAlignMask = ~0xFFFULL;
static const uint64_t kLinuxKasan_ShadowOffset64 = 0xdffffc0000000000;
@@ -112,6 +109,7 @@ static const uint64_t kNetBSD_ShadowOffset64 = 1ULL << 46;
static const uint64_t kNetBSDKasan_ShadowOffset64 = 0xdfff900000000000;
static const uint64_t kPS4CPU_ShadowOffset64 = 1ULL << 40;
static const uint64_t kWindowsShadowOffset32 = 3ULL << 28;
+static const uint64_t kEmscriptenShadowOffset = 0;
static const uint64_t kMyriadShadowScale = 5;
static const uint64_t kMyriadMemoryOffset32 = 0x80000000ULL;
@@ -275,6 +273,16 @@ static cl::opt<bool> ClInvalidPointerPairs(
cl::desc("Instrument <, <=, >, >=, - with pointer operands"), cl::Hidden,
cl::init(false));
+static cl::opt<bool> ClInvalidPointerCmp(
+ "asan-detect-invalid-pointer-cmp",
+ cl::desc("Instrument <, <=, >, >= with pointer operands"), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> ClInvalidPointerSub(
+ "asan-detect-invalid-pointer-sub",
+ cl::desc("Instrument - operations with pointer operands"), cl::Hidden,
+ cl::init(false));
+
static cl::opt<unsigned> ClRealignStack(
"asan-realign-stack",
cl::desc("Realign stack to the value of this flag (power of two)"),
@@ -311,10 +319,10 @@ static cl::opt<int> ClMappingScale("asan-mapping-scale",
cl::desc("scale of asan shadow mapping"),
cl::Hidden, cl::init(0));
-static cl::opt<unsigned long long> ClMappingOffset(
- "asan-mapping-offset",
- cl::desc("offset of asan shadow mapping [EXPERIMENTAL]"), cl::Hidden,
- cl::init(0));
+static cl::opt<uint64_t>
+ ClMappingOffset("asan-mapping-offset",
+ cl::desc("offset of asan shadow mapping [EXPERIMENTAL]"),
+ cl::Hidden, cl::init(0));
// Optimization flags. Not user visible, used mostly for testing
// and benchmarking the tool.
@@ -393,87 +401,6 @@ STATISTIC(NumOptimizedAccessesToStackVar,
namespace {
-/// Frontend-provided metadata for source location.
-struct LocationMetadata {
- StringRef Filename;
- int LineNo = 0;
- int ColumnNo = 0;
-
- LocationMetadata() = default;
-
- bool empty() const { return Filename.empty(); }
-
- void parse(MDNode *MDN) {
- assert(MDN->getNumOperands() == 3);
- MDString *DIFilename = cast<MDString>(MDN->getOperand(0));
- Filename = DIFilename->getString();
- LineNo =
- mdconst::extract<ConstantInt>(MDN->getOperand(1))->getLimitedValue();
- ColumnNo =
- mdconst::extract<ConstantInt>(MDN->getOperand(2))->getLimitedValue();
- }
-};
-
-/// Frontend-provided metadata for global variables.
-class GlobalsMetadata {
-public:
- struct Entry {
- LocationMetadata SourceLoc;
- StringRef Name;
- bool IsDynInit = false;
- bool IsBlacklisted = false;
-
- Entry() = default;
- };
-
- GlobalsMetadata() = default;
-
- void reset() {
- inited_ = false;
- Entries.clear();
- }
-
- void init(Module &M) {
- assert(!inited_);
- inited_ = true;
- NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
- if (!Globals) return;
- for (auto MDN : Globals->operands()) {
- // Metadata node contains the global and the fields of "Entry".
- assert(MDN->getNumOperands() == 5);
- auto *V = mdconst::extract_or_null<Constant>(MDN->getOperand(0));
- // The optimizer may optimize away a global entirely.
- if (!V) continue;
- auto *StrippedV = V->stripPointerCasts();
- auto *GV = dyn_cast<GlobalVariable>(StrippedV);
- if (!GV) continue;
- // We can already have an entry for GV if it was merged with another
- // global.
- Entry &E = Entries[GV];
- if (auto *Loc = cast_or_null<MDNode>(MDN->getOperand(1)))
- E.SourceLoc.parse(Loc);
- if (auto *Name = cast_or_null<MDString>(MDN->getOperand(2)))
- E.Name = Name->getString();
- ConstantInt *IsDynInit =
- mdconst::extract<ConstantInt>(MDN->getOperand(3));
- E.IsDynInit |= IsDynInit->isOne();
- ConstantInt *IsBlacklisted =
- mdconst::extract<ConstantInt>(MDN->getOperand(4));
- E.IsBlacklisted |= IsBlacklisted->isOne();
- }
- }
-
- /// Returns metadata entry for a given global.
- Entry get(GlobalVariable *G) const {
- auto Pos = Entries.find(G);
- return (Pos != Entries.end()) ? Pos->second : Entry();
- }
-
-private:
- bool inited_ = false;
- DenseMap<GlobalVariable *, Entry> Entries;
-};
-
/// This struct defines the shadow mapping using the rule:
/// shadow = (mem >> Scale) ADD-or-OR Offset.
/// If InGlobal is true, then
@@ -499,7 +426,6 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
bool IsPPC64 = TargetTriple.getArch() == Triple::ppc64 ||
TargetTriple.getArch() == Triple::ppc64le;
bool IsSystemZ = TargetTriple.getArch() == Triple::systemz;
- bool IsX86 = TargetTriple.getArch() == Triple::x86;
bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
bool IsMIPS32 = TargetTriple.isMIPS32();
bool IsMIPS64 = TargetTriple.isMIPS64();
@@ -508,6 +434,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
bool IsWindows = TargetTriple.isOSWindows();
bool IsFuchsia = TargetTriple.isOSFuchsia();
bool IsMyriad = TargetTriple.getVendor() == llvm::Triple::Myriad;
+ bool IsEmscripten = TargetTriple.isOSEmscripten();
ShadowMapping Mapping;
@@ -526,10 +453,11 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
else if (IsNetBSD)
Mapping.Offset = kNetBSD_ShadowOffset32;
else if (IsIOS)
- // If we're targeting iOS and x86, the binary is built for iOS simulator.
- Mapping.Offset = IsX86 ? kIOSSimShadowOffset32 : kIOSShadowOffset32;
+ Mapping.Offset = kDynamicShadowSentinel;
else if (IsWindows)
Mapping.Offset = kWindowsShadowOffset32;
+ else if (IsEmscripten)
+ Mapping.Offset = kEmscriptenShadowOffset;
else if (IsMyriad) {
uint64_t ShadowOffset = (kMyriadMemoryOffset32 + kMyriadMemorySize32 -
(kMyriadMemorySize32 >> Mapping.Scale));
@@ -566,10 +494,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
} else if (IsMIPS64)
Mapping.Offset = kMIPS64_ShadowOffset64;
else if (IsIOS)
- // If we're targeting iOS and x86, the binary is built for iOS simulator.
- // We are using dynamic shadow offset on the 64-bit devices.
- Mapping.Offset =
- IsX86_64 ? kIOSSimShadowOffset64 : kDynamicShadowSentinel;
+ Mapping.Offset = kDynamicShadowSentinel;
else if (IsAArch64)
Mapping.Offset = kAArch64_ShadowOffset64;
else
@@ -607,27 +532,53 @@ static size_t RedzoneSizeForScale(int MappingScale) {
namespace {
-/// AddressSanitizer: instrument the code in module to find memory bugs.
-struct AddressSanitizer : public FunctionPass {
- // Pass identification, replacement for typeid
+/// Module analysis for getting various metadata about the module.
+class ASanGlobalsMetadataWrapperPass : public ModulePass {
+public:
static char ID;
- explicit AddressSanitizer(bool CompileKernel = false, bool Recover = false,
- bool UseAfterScope = false)
- : FunctionPass(ID), UseAfterScope(UseAfterScope || ClUseAfterScope) {
- this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
- this->CompileKernel = ClEnableKasan.getNumOccurrences() > 0 ?
- ClEnableKasan : CompileKernel;
- initializeAddressSanitizerPass(*PassRegistry::getPassRegistry());
+ ASanGlobalsMetadataWrapperPass() : ModulePass(ID) {
+ initializeASanGlobalsMetadataWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override {
+ GlobalsMD = GlobalsMetadata(M);
+ return false;
}
StringRef getPassName() const override {
- return "AddressSanitizerFunctionPass";
+ return "ASanGlobalsMetadataWrapperPass";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.setPreservesAll();
+ }
+
+ GlobalsMetadata &getGlobalsMD() { return GlobalsMD; }
+
+private:
+ GlobalsMetadata GlobalsMD;
+};
+
+char ASanGlobalsMetadataWrapperPass::ID = 0;
+
+/// AddressSanitizer: instrument the code in module to find memory bugs.
+struct AddressSanitizer {
+ AddressSanitizer(Module &M, GlobalsMetadata &GlobalsMD,
+ bool CompileKernel = false, bool Recover = false,
+ bool UseAfterScope = false)
+ : UseAfterScope(UseAfterScope || ClUseAfterScope), GlobalsMD(GlobalsMD) {
+ this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
+ this->CompileKernel =
+ ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel;
+
+ C = &(M.getContext());
+ LongSize = M.getDataLayout().getPointerSizeInBits();
+ IntptrTy = Type::getIntNTy(*C, LongSize);
+ TargetTriple = Triple(M.getTargetTriple());
+
+ Mapping = getShadowMapping(TargetTriple, LongSize, this->CompileKernel);
}
uint64_t getAllocaSizeInBytes(const AllocaInst &AI) const {
@@ -672,14 +623,10 @@ struct AddressSanitizer : public FunctionPass {
Value *SizeArgument, uint32_t Exp);
void instrumentMemIntrinsic(MemIntrinsic *MI);
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
- bool runOnFunction(Function &F) override;
+ bool instrumentFunction(Function &F, const TargetLibraryInfo *TLI);
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
void maybeInsertDynamicShadowAtFunctionEntry(Function &F);
void markEscapedLocalAllocas(Function &F);
- bool doInitialization(Module &M) override;
- bool doFinalization(Module &M) override;
-
- DominatorTree &getDominatorTree() const { return *DT; }
private:
friend struct FunctionStackPoisoner;
@@ -715,36 +662,68 @@ private:
bool UseAfterScope;
Type *IntptrTy;
ShadowMapping Mapping;
- DominatorTree *DT;
- Function *AsanHandleNoReturnFunc;
- Function *AsanPtrCmpFunction, *AsanPtrSubFunction;
+ FunctionCallee AsanHandleNoReturnFunc;
+ FunctionCallee AsanPtrCmpFunction, AsanPtrSubFunction;
Constant *AsanShadowGlobal;
// These arrays is indexed by AccessIsWrite, Experiment and log2(AccessSize).
- Function *AsanErrorCallback[2][2][kNumberOfAccessSizes];
- Function *AsanMemoryAccessCallback[2][2][kNumberOfAccessSizes];
+ FunctionCallee AsanErrorCallback[2][2][kNumberOfAccessSizes];
+ FunctionCallee AsanMemoryAccessCallback[2][2][kNumberOfAccessSizes];
// These arrays is indexed by AccessIsWrite and Experiment.
- Function *AsanErrorCallbackSized[2][2];
- Function *AsanMemoryAccessCallbackSized[2][2];
+ FunctionCallee AsanErrorCallbackSized[2][2];
+ FunctionCallee AsanMemoryAccessCallbackSized[2][2];
- Function *AsanMemmove, *AsanMemcpy, *AsanMemset;
+ FunctionCallee AsanMemmove, AsanMemcpy, AsanMemset;
InlineAsm *EmptyAsm;
Value *LocalDynamicShadow = nullptr;
GlobalsMetadata GlobalsMD;
DenseMap<const AllocaInst *, bool> ProcessedAllocas;
};
-class AddressSanitizerModule : public ModulePass {
+class AddressSanitizerLegacyPass : public FunctionPass {
public:
- // Pass identification, replacement for typeid
static char ID;
- explicit AddressSanitizerModule(bool CompileKernel = false,
- bool Recover = false,
- bool UseGlobalsGC = true,
- bool UseOdrIndicator = false)
- : ModulePass(ID), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC),
+ explicit AddressSanitizerLegacyPass(bool CompileKernel = false,
+ bool Recover = false,
+ bool UseAfterScope = false)
+ : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover),
+ UseAfterScope(UseAfterScope) {
+ initializeAddressSanitizerLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "AddressSanitizerFunctionPass";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ASanGlobalsMetadataWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override {
+ GlobalsMetadata &GlobalsMD =
+ getAnalysis<ASanGlobalsMetadataWrapperPass>().getGlobalsMD();
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ AddressSanitizer ASan(*F.getParent(), GlobalsMD, CompileKernel, Recover,
+ UseAfterScope);
+ return ASan.instrumentFunction(F, TLI);
+ }
+
+private:
+ bool CompileKernel;
+ bool Recover;
+ bool UseAfterScope;
+};
+
+class ModuleAddressSanitizer {
+public:
+ ModuleAddressSanitizer(Module &M, GlobalsMetadata &GlobalsMD,
+ bool CompileKernel = false, bool Recover = false,
+ bool UseGlobalsGC = true, bool UseOdrIndicator = false)
+ : GlobalsMD(GlobalsMD), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC),
// Enable aliases as they should have no downside with ODR indicators.
UsePrivateAlias(UseOdrIndicator || ClUsePrivateAlias),
UseOdrIndicator(UseOdrIndicator || ClUseOdrIndicator),
@@ -759,10 +738,15 @@ public:
this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
this->CompileKernel =
ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel;
+
+ C = &(M.getContext());
+ int LongSize = M.getDataLayout().getPointerSizeInBits();
+ IntptrTy = Type::getIntNTy(*C, LongSize);
+ TargetTriple = Triple(M.getTargetTriple());
+ Mapping = getShadowMapping(TargetTriple, LongSize, this->CompileKernel);
}
- bool runOnModule(Module &M) override;
- StringRef getPassName() const override { return "AddressSanitizerModule"; }
+ bool instrumentModule(Module &);
private:
void initializeCallbacks(Module &M);
@@ -810,19 +794,54 @@ private:
LLVMContext *C;
Triple TargetTriple;
ShadowMapping Mapping;
- Function *AsanPoisonGlobals;
- Function *AsanUnpoisonGlobals;
- Function *AsanRegisterGlobals;
- Function *AsanUnregisterGlobals;
- Function *AsanRegisterImageGlobals;
- Function *AsanUnregisterImageGlobals;
- Function *AsanRegisterElfGlobals;
- Function *AsanUnregisterElfGlobals;
+ FunctionCallee AsanPoisonGlobals;
+ FunctionCallee AsanUnpoisonGlobals;
+ FunctionCallee AsanRegisterGlobals;
+ FunctionCallee AsanUnregisterGlobals;
+ FunctionCallee AsanRegisterImageGlobals;
+ FunctionCallee AsanUnregisterImageGlobals;
+ FunctionCallee AsanRegisterElfGlobals;
+ FunctionCallee AsanUnregisterElfGlobals;
Function *AsanCtorFunction = nullptr;
Function *AsanDtorFunction = nullptr;
};
+class ModuleAddressSanitizerLegacyPass : public ModulePass {
+public:
+ static char ID;
+
+ explicit ModuleAddressSanitizerLegacyPass(bool CompileKernel = false,
+ bool Recover = false,
+ bool UseGlobalGC = true,
+ bool UseOdrIndicator = false)
+ : ModulePass(ID), CompileKernel(CompileKernel), Recover(Recover),
+ UseGlobalGC(UseGlobalGC), UseOdrIndicator(UseOdrIndicator) {
+ initializeModuleAddressSanitizerLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "ModuleAddressSanitizer"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ASanGlobalsMetadataWrapperPass>();
+ }
+
+ bool runOnModule(Module &M) override {
+ GlobalsMetadata &GlobalsMD =
+ getAnalysis<ASanGlobalsMetadataWrapperPass>().getGlobalsMD();
+ ModuleAddressSanitizer ASanModule(M, GlobalsMD, CompileKernel, Recover,
+ UseGlobalGC, UseOdrIndicator);
+ return ASanModule.instrumentModule(M);
+ }
+
+private:
+ bool CompileKernel;
+ bool Recover;
+ bool UseGlobalGC;
+ bool UseOdrIndicator;
+};
+
// Stack poisoning does not play well with exception handling.
// When an exception is thrown, we essentially bypass the code
// that unpoisones the stack. This is why the run-time library has
@@ -846,11 +865,11 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
SmallVector<Instruction *, 8> RetVec;
unsigned StackAlignment;
- Function *AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
- *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
- Function *AsanSetShadowFunc[0x100] = {};
- Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc;
- Function *AsanAllocaPoisonFunc, *AsanAllocasUnpoisonFunc;
+ FunctionCallee AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
+ AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
+ FunctionCallee AsanSetShadowFunc[0x100] = {};
+ FunctionCallee AsanPoisonStackMemoryFunc, AsanUnpoisonStackMemoryFunc;
+ FunctionCallee AsanAllocaPoisonFunc, AsanAllocasUnpoisonFunc;
// Stores a place and arguments of poisoning/unpoisoning call for alloca.
struct AllocaPoisonCall {
@@ -861,6 +880,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
};
SmallVector<AllocaPoisonCall, 8> DynamicAllocaPoisonCallVec;
SmallVector<AllocaPoisonCall, 8> StaticAllocaPoisonCallVec;
+ bool HasUntracedLifetimeIntrinsic = false;
SmallVector<AllocaInst *, 1> DynamicAllocaVec;
SmallVector<IntrinsicInst *, 1> StackRestoreVec;
@@ -876,13 +896,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
std::unique_ptr<CallInst> EmptyInlineAsm;
FunctionStackPoisoner(Function &F, AddressSanitizer &ASan)
- : F(F),
- ASan(ASan),
- DIB(*F.getParent(), /*AllowUnresolved*/ false),
- C(ASan.C),
- IntptrTy(ASan.IntptrTy),
- IntptrPtrTy(PointerType::get(IntptrTy, 0)),
- Mapping(ASan.Mapping),
+ : F(F), ASan(ASan), DIB(*F.getParent(), /*AllowUnresolved*/ false),
+ C(ASan.C), IntptrTy(ASan.IntptrTy),
+ IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping),
StackAlignment(1 << Mapping.Scale),
EmptyInlineAsm(CallInst::Create(ASan.EmptyAsm)) {}
@@ -899,6 +915,14 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
initializeCallbacks(*F.getParent());
+ if (HasUntracedLifetimeIntrinsic) {
+ // If there are lifetime intrinsics which couldn't be traced back to an
+ // alloca, we may not know exactly when a variable enters scope, and
+ // therefore should "fail safe" by not poisoning them.
+ StaticAllocaPoisonCallVec.clear();
+ DynamicAllocaPoisonCallVec.clear();
+ }
+
processDynamicAllocas();
processStaticAllocas();
@@ -950,8 +974,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
DynamicAreaOffset);
}
- IRB.CreateCall(AsanAllocasUnpoisonFunc,
- {IRB.CreateLoad(DynamicAllocaLayout), DynamicAreaPtr});
+ IRB.CreateCall(
+ AsanAllocasUnpoisonFunc,
+ {IRB.CreateLoad(IntptrTy, DynamicAllocaLayout), DynamicAreaPtr});
}
// Unpoison dynamic allocas redzones.
@@ -1018,8 +1043,14 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
!ConstantInt::isValueValidForType(IntptrTy, SizeValue))
return;
// Find alloca instruction that corresponds to llvm.lifetime argument.
- AllocaInst *AI = findAllocaForValue(II.getArgOperand(1));
- if (!AI || !ASan.isInterestingAlloca(*AI))
+ AllocaInst *AI =
+ llvm::findAllocaForValue(II.getArgOperand(1), AllocaForValue);
+ if (!AI) {
+ HasUntracedLifetimeIntrinsic = true;
+ return;
+ }
+ // We're interested only in allocas we can handle.
+ if (!ASan.isInterestingAlloca(*AI))
return;
bool DoPoison = (ID == Intrinsic::lifetime_end);
AllocaPoisonCall APC = {&II, AI, SizeValue, DoPoison};
@@ -1042,16 +1073,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
// ---------------------- Helpers.
void initializeCallbacks(Module &M);
- bool doesDominateAllExits(const Instruction *I) const {
- for (auto Ret : RetVec) {
- if (!ASan.getDominatorTree().dominates(I, Ret)) return false;
- }
- return true;
- }
-
- /// Finds alloca where the value comes from.
- AllocaInst *findAllocaForValue(Value *V);
-
// Copies bytes from ShadowBytes into shadow memory for indexes where
// ShadowMask is not zero. If ShadowMask[i] is zero, we assume that
// ShadowBytes[i] is constantly zero and doesn't need to be overwritten.
@@ -1074,16 +1095,111 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
} // end anonymous namespace
-char AddressSanitizer::ID = 0;
+void LocationMetadata::parse(MDNode *MDN) {
+ assert(MDN->getNumOperands() == 3);
+ MDString *DIFilename = cast<MDString>(MDN->getOperand(0));
+ Filename = DIFilename->getString();
+ LineNo = mdconst::extract<ConstantInt>(MDN->getOperand(1))->getLimitedValue();
+ ColumnNo =
+ mdconst::extract<ConstantInt>(MDN->getOperand(2))->getLimitedValue();
+}
+
+// FIXME: It would be cleaner to instead attach relevant metadata to the globals
+// we want to sanitize instead and reading this metadata on each pass over a
+// function instead of reading module level metadata at first.
+GlobalsMetadata::GlobalsMetadata(Module &M) {
+ NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
+ if (!Globals)
+ return;
+ for (auto MDN : Globals->operands()) {
+ // Metadata node contains the global and the fields of "Entry".
+ assert(MDN->getNumOperands() == 5);
+ auto *V = mdconst::extract_or_null<Constant>(MDN->getOperand(0));
+ // The optimizer may optimize away a global entirely.
+ if (!V)
+ continue;
+ auto *StrippedV = V->stripPointerCasts();
+ auto *GV = dyn_cast<GlobalVariable>(StrippedV);
+ if (!GV)
+ continue;
+ // We can already have an entry for GV if it was merged with another
+ // global.
+ Entry &E = Entries[GV];
+ if (auto *Loc = cast_or_null<MDNode>(MDN->getOperand(1)))
+ E.SourceLoc.parse(Loc);
+ if (auto *Name = cast_or_null<MDString>(MDN->getOperand(2)))
+ E.Name = Name->getString();
+ ConstantInt *IsDynInit = mdconst::extract<ConstantInt>(MDN->getOperand(3));
+ E.IsDynInit |= IsDynInit->isOne();
+ ConstantInt *IsBlacklisted =
+ mdconst::extract<ConstantInt>(MDN->getOperand(4));
+ E.IsBlacklisted |= IsBlacklisted->isOne();
+ }
+}
+
+AnalysisKey ASanGlobalsMetadataAnalysis::Key;
+
+GlobalsMetadata ASanGlobalsMetadataAnalysis::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ return GlobalsMetadata(M);
+}
+
+AddressSanitizerPass::AddressSanitizerPass(bool CompileKernel, bool Recover,
+ bool UseAfterScope)
+ : CompileKernel(CompileKernel), Recover(Recover),
+ UseAfterScope(UseAfterScope) {}
+
+PreservedAnalyses AddressSanitizerPass::run(Function &F,
+ AnalysisManager<Function> &AM) {
+ auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
+ auto &MAM = MAMProxy.getManager();
+ Module &M = *F.getParent();
+ if (auto *R = MAM.getCachedResult<ASanGlobalsMetadataAnalysis>(M)) {
+ const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+ AddressSanitizer Sanitizer(M, *R, CompileKernel, Recover, UseAfterScope);
+ if (Sanitizer.instrumentFunction(F, TLI))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+ }
+
+ report_fatal_error(
+ "The ASanGlobalsMetadataAnalysis is required to run before "
+ "AddressSanitizer can run");
+ return PreservedAnalyses::all();
+}
+
+ModuleAddressSanitizerPass::ModuleAddressSanitizerPass(bool CompileKernel,
+ bool Recover,
+ bool UseGlobalGC,
+ bool UseOdrIndicator)
+ : CompileKernel(CompileKernel), Recover(Recover), UseGlobalGC(UseGlobalGC),
+ UseOdrIndicator(UseOdrIndicator) {}
+
+PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M,
+ AnalysisManager<Module> &AM) {
+ GlobalsMetadata &GlobalsMD = AM.getResult<ASanGlobalsMetadataAnalysis>(M);
+ ModuleAddressSanitizer Sanitizer(M, GlobalsMD, CompileKernel, Recover,
+ UseGlobalGC, UseOdrIndicator);
+ if (Sanitizer.instrumentModule(M))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+INITIALIZE_PASS(ASanGlobalsMetadataWrapperPass, "asan-globals-md",
+ "Read metadata to mark which globals should be instrumented "
+ "when running ASan.",
+ false, true)
+
+char AddressSanitizerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(
- AddressSanitizer, "asan",
+ AddressSanitizerLegacyPass, "asan",
"AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ASanGlobalsMetadataWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(
- AddressSanitizer, "asan",
+ AddressSanitizerLegacyPass, "asan",
"AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
false)
@@ -1091,24 +1207,22 @@ FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel,
bool Recover,
bool UseAfterScope) {
assert(!CompileKernel || Recover);
- return new AddressSanitizer(CompileKernel, Recover, UseAfterScope);
+ return new AddressSanitizerLegacyPass(CompileKernel, Recover, UseAfterScope);
}
-char AddressSanitizerModule::ID = 0;
+char ModuleAddressSanitizerLegacyPass::ID = 0;
INITIALIZE_PASS(
- AddressSanitizerModule, "asan-module",
+ ModuleAddressSanitizerLegacyPass, "asan-module",
"AddressSanitizer: detects use-after-free and out-of-bounds bugs."
"ModulePass",
false, false)
-ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel,
- bool Recover,
- bool UseGlobalsGC,
- bool UseOdrIndicator) {
+ModulePass *llvm::createModuleAddressSanitizerLegacyPassPass(
+ bool CompileKernel, bool Recover, bool UseGlobalsGC, bool UseOdrIndicator) {
assert(!CompileKernel || Recover);
- return new AddressSanitizerModule(CompileKernel, Recover, UseGlobalsGC,
- UseOdrIndicator);
+ return new ModuleAddressSanitizerLegacyPass(CompileKernel, Recover,
+ UseGlobalsGC, UseOdrIndicator);
}
static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
@@ -1312,11 +1426,24 @@ static bool isPointerOperand(Value *V) {
// This is a rough heuristic; it may cause both false positives and
// false negatives. The proper implementation requires cooperation with
// the frontend.
-static bool isInterestingPointerComparisonOrSubtraction(Instruction *I) {
+static bool isInterestingPointerComparison(Instruction *I) {
if (ICmpInst *Cmp = dyn_cast<ICmpInst>(I)) {
- if (!Cmp->isRelational()) return false;
- } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
- if (BO->getOpcode() != Instruction::Sub) return false;
+ if (!Cmp->isRelational())
+ return false;
+ } else {
+ return false;
+ }
+ return isPointerOperand(I->getOperand(0)) &&
+ isPointerOperand(I->getOperand(1));
+}
+
+// This is a rough heuristic; it may cause both false positives and
+// false negatives. The proper implementation requires cooperation with
+// the frontend.
+static bool isInterestingPointerSubtraction(Instruction *I) {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ if (BO->getOpcode() != Instruction::Sub)
+ return false;
} else {
return false;
}
@@ -1328,13 +1455,16 @@ bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
// If a global variable does not have dynamic initialization we don't
// have to instrument it. However, if a global does not have initializer
// at all, we assume it has dynamic initializer (in other TU).
+ //
+ // FIXME: Metadata should be attched directly to the global directly instead
+ // of being added to llvm.asan.globals.
return G->hasInitializer() && !GlobalsMD.get(G).IsDynInit;
}
void AddressSanitizer::instrumentPointerComparisonOrSubtraction(
Instruction *I) {
IRBuilder<> IRB(I);
- Function *F = isa<ICmpInst>(I) ? AsanPtrCmpFunction : AsanPtrSubFunction;
+ FunctionCallee F = isa<ICmpInst>(I) ? AsanPtrCmpFunction : AsanPtrSubFunction;
Value *Param[2] = {I->getOperand(0), I->getOperand(1)};
for (Value *&i : Param) {
if (i->getType()->isPointerTy())
@@ -1392,7 +1522,7 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
IRBuilder<> IRB(InsertBefore);
InstrumentedAddress =
- IRB.CreateGEP(Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
+ IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
doInstrumentAddress(Pass, I, InsertBefore, InstrumentedAddress, Alignment,
Granularity, ElemTypeSize, IsWrite, SizeArgument,
UseCalls, Exp);
@@ -1553,7 +1683,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
Value *ShadowPtr = memToShadow(AddrLong, IRB);
Value *CmpVal = Constant::getNullValue(ShadowTy);
Value *ShadowValue =
- IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
+ IRB.CreateLoad(ShadowTy, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
size_t Granularity = 1ULL << Mapping.Scale;
@@ -1612,7 +1742,7 @@ void AddressSanitizer::instrumentUnusualSizeOrAlignment(
}
}
-void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit,
+void ModuleAddressSanitizer::poisonOneInitializer(Function &GlobalInit,
GlobalValue *ModuleName) {
// Set up the arguments to our poison/unpoison functions.
IRBuilder<> IRB(&GlobalInit.front(),
@@ -1628,7 +1758,7 @@ void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit,
CallInst::Create(AsanUnpoisonGlobals, "", RI);
}
-void AddressSanitizerModule::createInitializerPoisonCalls(
+void ModuleAddressSanitizer::createInitializerPoisonCalls(
Module &M, GlobalValue *ModuleName) {
GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
if (!GV)
@@ -1653,10 +1783,12 @@ void AddressSanitizerModule::createInitializerPoisonCalls(
}
}
-bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
+bool ModuleAddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
Type *Ty = G->getValueType();
LLVM_DEBUG(dbgs() << "GLOBAL: " << *G << "\n");
+ // FIXME: Metadata should be attched directly to the global directly instead
+ // of being added to llvm.asan.globals.
if (GlobalsMD.get(G).IsBlacklisted) return false;
if (!Ty->isSized()) return false;
if (!G->hasInitializer()) return false;
@@ -1768,7 +1900,7 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
// On Mach-O platforms, we emit global metadata in a separate section of the
// binary in order to allow the linker to properly dead strip. This is only
// supported on recent versions of ld64.
-bool AddressSanitizerModule::ShouldUseMachOGlobalsSection() const {
+bool ModuleAddressSanitizer::ShouldUseMachOGlobalsSection() const {
if (!TargetTriple.isOSBinFormatMachO())
return false;
@@ -1782,7 +1914,7 @@ bool AddressSanitizerModule::ShouldUseMachOGlobalsSection() const {
return false;
}
-StringRef AddressSanitizerModule::getGlobalMetadataSection() const {
+StringRef ModuleAddressSanitizer::getGlobalMetadataSection() const {
switch (TargetTriple.getObjectFormat()) {
case Triple::COFF: return ".ASAN$GL";
case Triple::ELF: return "asan_globals";
@@ -1792,52 +1924,39 @@ StringRef AddressSanitizerModule::getGlobalMetadataSection() const {
llvm_unreachable("unsupported object format");
}
-void AddressSanitizerModule::initializeCallbacks(Module &M) {
+void ModuleAddressSanitizer::initializeCallbacks(Module &M) {
IRBuilder<> IRB(*C);
// Declare our poisoning and unpoisoning functions.
- AsanPoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy));
- AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
- AsanUnpoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanUnpoisonGlobalsName, IRB.getVoidTy()));
- AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
+ AsanPoisonGlobals =
+ M.getOrInsertFunction(kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy);
+ AsanUnpoisonGlobals =
+ M.getOrInsertFunction(kAsanUnpoisonGlobalsName, IRB.getVoidTy());
// Declare functions that register/unregister globals.
- AsanRegisterGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy));
- AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
- AsanUnregisterGlobals = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(kAsanUnregisterGlobalsName, IRB.getVoidTy(),
- IntptrTy, IntptrTy));
- AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
+ AsanRegisterGlobals = M.getOrInsertFunction(
+ kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy);
+ AsanUnregisterGlobals = M.getOrInsertFunction(
+ kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy);
// Declare the functions that find globals in a shared object and then invoke
// the (un)register function on them.
- AsanRegisterImageGlobals =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanRegisterImageGlobalsName, IRB.getVoidTy(), IntptrTy));
- AsanRegisterImageGlobals->setLinkage(Function::ExternalLinkage);
-
- AsanUnregisterImageGlobals =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy));
- AsanUnregisterImageGlobals->setLinkage(Function::ExternalLinkage);
+ AsanRegisterImageGlobals = M.getOrInsertFunction(
+ kAsanRegisterImageGlobalsName, IRB.getVoidTy(), IntptrTy);
+ AsanUnregisterImageGlobals = M.getOrInsertFunction(
+ kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy);
- AsanRegisterElfGlobals = checkSanitizerInterfaceFunction(
+ AsanRegisterElfGlobals =
M.getOrInsertFunction(kAsanRegisterElfGlobalsName, IRB.getVoidTy(),
- IntptrTy, IntptrTy, IntptrTy));
- AsanRegisterElfGlobals->setLinkage(Function::ExternalLinkage);
-
- AsanUnregisterElfGlobals = checkSanitizerInterfaceFunction(
+ IntptrTy, IntptrTy, IntptrTy);
+ AsanUnregisterElfGlobals =
M.getOrInsertFunction(kAsanUnregisterElfGlobalsName, IRB.getVoidTy(),
- IntptrTy, IntptrTy, IntptrTy));
- AsanUnregisterElfGlobals->setLinkage(Function::ExternalLinkage);
+ IntptrTy, IntptrTy, IntptrTy);
}
// Put the metadata and the instrumented global in the same group. This ensures
// that the metadata is discarded if the instrumented global is discarded.
-void AddressSanitizerModule::SetComdatForGlobalMetadata(
+void ModuleAddressSanitizer::SetComdatForGlobalMetadata(
GlobalVariable *G, GlobalVariable *Metadata, StringRef InternalSuffix) {
Module &M = *G->getParent();
Comdat *C = G->getComdat();
@@ -1875,7 +1994,7 @@ void AddressSanitizerModule::SetComdatForGlobalMetadata(
// Create a separate metadata global and put it in the appropriate ASan
// global registration section.
GlobalVariable *
-AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer,
+ModuleAddressSanitizer::CreateMetadataGlobal(Module &M, Constant *Initializer,
StringRef OriginalName) {
auto Linkage = TargetTriple.isOSBinFormatMachO()
? GlobalVariable::InternalLinkage
@@ -1887,7 +2006,7 @@ AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer,
return Metadata;
}
-IRBuilder<> AddressSanitizerModule::CreateAsanModuleDtor(Module &M) {
+IRBuilder<> ModuleAddressSanitizer::CreateAsanModuleDtor(Module &M) {
AsanDtorFunction =
Function::Create(FunctionType::get(Type::getVoidTy(*C), false),
GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
@@ -1896,7 +2015,7 @@ IRBuilder<> AddressSanitizerModule::CreateAsanModuleDtor(Module &M) {
return IRBuilder<>(ReturnInst::Create(*C, AsanDtorBB));
}
-void AddressSanitizerModule::InstrumentGlobalsCOFF(
+void ModuleAddressSanitizer::InstrumentGlobalsCOFF(
IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
ArrayRef<Constant *> MetadataInitializers) {
assert(ExtendedGlobals.size() == MetadataInitializers.size());
@@ -1920,7 +2039,7 @@ void AddressSanitizerModule::InstrumentGlobalsCOFF(
}
}
-void AddressSanitizerModule::InstrumentGlobalsELF(
+void ModuleAddressSanitizer::InstrumentGlobalsELF(
IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
ArrayRef<Constant *> MetadataInitializers,
const std::string &UniqueModuleId) {
@@ -1979,7 +2098,7 @@ void AddressSanitizerModule::InstrumentGlobalsELF(
IRB.CreatePointerCast(StopELFMetadata, IntptrTy)});
}
-void AddressSanitizerModule::InstrumentGlobalsMachO(
+void ModuleAddressSanitizer::InstrumentGlobalsMachO(
IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
ArrayRef<Constant *> MetadataInitializers) {
assert(ExtendedGlobals.size() == MetadataInitializers.size());
@@ -2036,7 +2155,7 @@ void AddressSanitizerModule::InstrumentGlobalsMachO(
{IRB.CreatePointerCast(RegisteredFlag, IntptrTy)});
}
-void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray(
+void ModuleAddressSanitizer::InstrumentGlobalsWithMetadataArray(
IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
ArrayRef<Constant *> MetadataInitializers) {
assert(ExtendedGlobals.size() == MetadataInitializers.size());
@@ -2070,9 +2189,9 @@ void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray(
// redzones and inserts this function into llvm.global_ctors.
// Sets *CtorComdat to true if the global registration code emitted into the
// asan constructor is comdat-compatible.
-bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat) {
+bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
+ bool *CtorComdat) {
*CtorComdat = false;
- GlobalsMD.init(M);
SmallVector<GlobalVariable *, 16> GlobalsToChange;
@@ -2115,6 +2234,8 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool
static const uint64_t kMaxGlobalRedzone = 1 << 18;
GlobalVariable *G = GlobalsToChange[i];
+ // FIXME: Metadata should be attched directly to the global directly instead
+ // of being added to llvm.asan.globals.
auto MD = GlobalsMD.get(G);
StringRef NameForGlobal = G->getName();
// Create string holding the global name (use global name from metadata
@@ -2271,7 +2392,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool
return true;
}
-int AddressSanitizerModule::GetAsanVersion(const Module &M) const {
+int ModuleAddressSanitizer::GetAsanVersion(const Module &M) const {
int LongSize = M.getDataLayout().getPointerSizeInBits();
bool isAndroid = Triple(M.getTargetTriple()).isAndroid();
int Version = 8;
@@ -2281,12 +2402,7 @@ int AddressSanitizerModule::GetAsanVersion(const Module &M) const {
return Version;
}
-bool AddressSanitizerModule::runOnModule(Module &M) {
- C = &(M.getContext());
- int LongSize = M.getDataLayout().getPointerSizeInBits();
- IntptrTy = Type::getIntNTy(*C, LongSize);
- TargetTriple = Triple(M.getTargetTriple());
- Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel);
+bool ModuleAddressSanitizer::instrumentModule(Module &M) {
initializeCallbacks(M);
if (CompileKernel)
@@ -2346,51 +2462,49 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
Args2.push_back(ExpType);
Args1.push_back(ExpType);
}
- AsanErrorCallbackSized[AccessIsWrite][Exp] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanReportErrorTemplate + ExpStr + TypeStr + "_n" + EndingStr,
- FunctionType::get(IRB.getVoidTy(), Args2, false)));
+ AsanErrorCallbackSized[AccessIsWrite][Exp] = M.getOrInsertFunction(
+ kAsanReportErrorTemplate + ExpStr + TypeStr + "_n" + EndingStr,
+ FunctionType::get(IRB.getVoidTy(), Args2, false));
- AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N" + EndingStr,
- FunctionType::get(IRB.getVoidTy(), Args2, false)));
+ AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] = M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N" + EndingStr,
+ FunctionType::get(IRB.getVoidTy(), Args2, false));
for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
AccessSizeIndex++) {
const std::string Suffix = TypeStr + itostr(1ULL << AccessSizeIndex);
AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ M.getOrInsertFunction(
kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr,
- FunctionType::get(IRB.getVoidTy(), Args1, false)));
+ FunctionType::get(IRB.getVoidTy(), Args1, false));
AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ M.getOrInsertFunction(
ClMemoryAccessCallbackPrefix + ExpStr + Suffix + EndingStr,
- FunctionType::get(IRB.getVoidTy(), Args1, false)));
+ FunctionType::get(IRB.getVoidTy(), Args1, false));
}
}
}
const std::string MemIntrinCallbackPrefix =
CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
- AsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- MemIntrinCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy));
- AsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- MemIntrinCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy));
- AsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- MemIntrinCallbackPrefix + "memset", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy));
-
- AsanHandleNoReturnFunc = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy()));
-
- AsanPtrCmpFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy));
- AsanPtrSubFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy));
+ AsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy);
+ AsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy);
+ AsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt32Ty(), IntptrTy);
+
+ AsanHandleNoReturnFunc =
+ M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy());
+
+ AsanPtrCmpFunction =
+ M.getOrInsertFunction(kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy);
+ AsanPtrSubFunction =
+ M.getOrInsertFunction(kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy);
// We insert an empty inline asm after __asan_report* to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
@@ -2400,25 +2514,6 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
ArrayType::get(IRB.getInt8Ty(), 0));
}
-// virtual
-bool AddressSanitizer::doInitialization(Module &M) {
- // Initialize the private fields. No one has accessed them before.
- GlobalsMD.init(M);
-
- C = &(M.getContext());
- LongSize = M.getDataLayout().getPointerSizeInBits();
- IntptrTy = Type::getIntNTy(*C, LongSize);
- TargetTriple = Triple(M.getTargetTriple());
-
- Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel);
- return true;
-}
-
-bool AddressSanitizer::doFinalization(Module &M) {
- GlobalsMD.reset();
- return false;
-}
-
bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
// For each NSObject descendant having a +load method, this method is invoked
// by the ObjC runtime before any of the static constructors is called.
@@ -2428,7 +2523,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
// We cannot just ignore these methods, because they may call other
// instrumented functions.
if (F.getName().find(" load]") != std::string::npos) {
- Function *AsanInitFunction =
+ FunctionCallee AsanInitFunction =
declareSanitizerInitFunction(*F.getParent(), kAsanInitName, {});
IRBuilder<> IRB(&F.front(), F.front().begin());
IRB.CreateCall(AsanInitFunction, {});
@@ -2460,7 +2555,7 @@ void AddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) {
} else {
Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal(
kAsanShadowMemoryDynamicAddress, IntptrTy);
- LocalDynamicShadow = IRB.CreateLoad(GlobalDynamicAddress);
+ LocalDynamicShadow = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress);
}
}
@@ -2492,7 +2587,8 @@ void AddressSanitizer::markEscapedLocalAllocas(Function &F) {
}
}
-bool AddressSanitizer::runOnFunction(Function &F) {
+bool AddressSanitizer::instrumentFunction(Function &F,
+ const TargetLibraryInfo *TLI) {
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
if (!ClDebugFunc.empty() && ClDebugFunc == F.getName()) return false;
if (F.getName().startswith("__asan_")) return false;
@@ -2511,7 +2607,6 @@ bool AddressSanitizer::runOnFunction(Function &F) {
LLVM_DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
initializeCallbacks(*F.getParent());
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
FunctionStateRAII CleanupObj(this);
@@ -2532,8 +2627,6 @@ bool AddressSanitizer::runOnFunction(Function &F) {
bool IsWrite;
unsigned Alignment;
uint64_t TypeSize;
- const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
// Fill the set of memory operations to instrument.
for (auto &BB : F) {
@@ -2557,8 +2650,10 @@ bool AddressSanitizer::runOnFunction(Function &F) {
continue; // We've seen this temp in the current BB.
}
}
- } else if (ClInvalidPointerPairs &&
- isInterestingPointerComparisonOrSubtraction(&Inst)) {
+ } else if (((ClInvalidPointerPairs || ClInvalidPointerCmp) &&
+ isInterestingPointerComparison(&Inst)) ||
+ ((ClInvalidPointerPairs || ClInvalidPointerSub) &&
+ isInterestingPointerSubtraction(&Inst))) {
PointerComparisonsOrSubtracts.push_back(&Inst);
continue;
} else if (isa<MemIntrinsic>(Inst)) {
@@ -2569,7 +2664,8 @@ bool AddressSanitizer::runOnFunction(Function &F) {
if (CS) {
// A call inside BB.
TempsToInstrument.clear();
- if (CS.doesNotReturn()) NoReturnCalls.push_back(CS.getInstruction());
+ if (CS.doesNotReturn() && !CS->getMetadata("nosanitize"))
+ NoReturnCalls.push_back(CS.getInstruction());
}
if (CallInst *CI = dyn_cast<CallInst>(&Inst))
maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
@@ -2606,7 +2702,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
FunctionStackPoisoner FSP(F, *this);
bool ChangedStack = FSP.runOnFunction();
- // We must unpoison the stack before every NoReturn call (throw, _exit, etc).
+ // We must unpoison the stack before NoReturn calls (throw, _exit, etc).
// See e.g. https://github.com/google/sanitizers/issues/37
for (auto CI : NoReturnCalls) {
IRBuilder<> IRB(CI);
@@ -2643,20 +2739,17 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
IRBuilder<> IRB(*C);
for (int i = 0; i <= kMaxAsanStackMallocSizeClass; i++) {
std::string Suffix = itostr(i);
- AsanStackMallocFunc[i] = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(kAsanStackMallocNameTemplate + Suffix, IntptrTy,
- IntptrTy));
- AsanStackFreeFunc[i] = checkSanitizerInterfaceFunction(
+ AsanStackMallocFunc[i] = M.getOrInsertFunction(
+ kAsanStackMallocNameTemplate + Suffix, IntptrTy, IntptrTy);
+ AsanStackFreeFunc[i] =
M.getOrInsertFunction(kAsanStackFreeNameTemplate + Suffix,
- IRB.getVoidTy(), IntptrTy, IntptrTy));
+ IRB.getVoidTy(), IntptrTy, IntptrTy);
}
if (ASan.UseAfterScope) {
- AsanPoisonStackMemoryFunc = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(kAsanPoisonStackMemoryName, IRB.getVoidTy(),
- IntptrTy, IntptrTy));
- AsanUnpoisonStackMemoryFunc = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(kAsanUnpoisonStackMemoryName, IRB.getVoidTy(),
- IntptrTy, IntptrTy));
+ AsanPoisonStackMemoryFunc = M.getOrInsertFunction(
+ kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy);
+ AsanUnpoisonStackMemoryFunc = M.getOrInsertFunction(
+ kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy);
}
for (size_t Val : {0x00, 0xf1, 0xf2, 0xf3, 0xf5, 0xf8}) {
@@ -2664,15 +2757,13 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
Name << kAsanSetShadowPrefix;
Name << std::setw(2) << std::setfill('0') << std::hex << Val;
AsanSetShadowFunc[Val] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- Name.str(), IRB.getVoidTy(), IntptrTy, IntptrTy));
+ M.getOrInsertFunction(Name.str(), IRB.getVoidTy(), IntptrTy, IntptrTy);
}
- AsanAllocaPoisonFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy));
- AsanAllocasUnpoisonFunc =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanAllocasUnpoison, IRB.getVoidTy(), IntptrTy, IntptrTy));
+ AsanAllocaPoisonFunc = M.getOrInsertFunction(
+ kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy);
+ AsanAllocasUnpoisonFunc = M.getOrInsertFunction(
+ kAsanAllocasUnpoison, IRB.getVoidTy(), IntptrTy, IntptrTy);
}
void FunctionStackPoisoner::copyToShadowInline(ArrayRef<uint8_t> ShadowMask,
@@ -2958,7 +3049,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
Value *FakeStack;
Value *LocalStackBase;
Value *LocalStackBaseAlloca;
- bool Deref;
+ uint8_t DIExprFlags = DIExpression::ApplyOffset;
if (DoStackMalloc) {
LocalStackBaseAlloca =
@@ -2969,9 +3060,9 @@ void FunctionStackPoisoner::processStaticAllocas() {
// void *LocalStackBase = (FakeStack) ? FakeStack : alloca(LocalStackSize);
Constant *OptionDetectUseAfterReturn = F.getParent()->getOrInsertGlobal(
kAsanOptionDetectUseAfterReturn, IRB.getInt32Ty());
- Value *UseAfterReturnIsEnabled =
- IRB.CreateICmpNE(IRB.CreateLoad(OptionDetectUseAfterReturn),
- Constant::getNullValue(IRB.getInt32Ty()));
+ Value *UseAfterReturnIsEnabled = IRB.CreateICmpNE(
+ IRB.CreateLoad(IRB.getInt32Ty(), OptionDetectUseAfterReturn),
+ Constant::getNullValue(IRB.getInt32Ty()));
Instruction *Term =
SplitBlockAndInsertIfThen(UseAfterReturnIsEnabled, InsBefore, false);
IRBuilder<> IRBIf(Term);
@@ -2999,7 +3090,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
LocalStackBase = createPHI(IRB, NoFakeStack, AllocaValue, Term, FakeStack);
IRB.SetCurrentDebugLocation(EntryDebugLocation);
IRB.CreateStore(LocalStackBase, LocalStackBaseAlloca);
- Deref = true;
+ DIExprFlags |= DIExpression::DerefBefore;
} else {
// void *FakeStack = nullptr;
// void *LocalStackBase = alloca(LocalStackSize);
@@ -3007,14 +3098,13 @@ void FunctionStackPoisoner::processStaticAllocas() {
LocalStackBase =
DoDynamicAlloca ? createAllocaForLayout(IRB, L, true) : StaticAlloca;
LocalStackBaseAlloca = LocalStackBase;
- Deref = false;
}
// Replace Alloca instructions with base+offset.
for (const auto &Desc : SVD) {
AllocaInst *AI = Desc.AI;
- replaceDbgDeclareForAlloca(AI, LocalStackBaseAlloca, DIB, Deref,
- Desc.Offset, DIExpression::NoDeref);
+ replaceDbgDeclareForAlloca(AI, LocalStackBaseAlloca, DIB, DIExprFlags,
+ Desc.Offset);
Value *NewAllocaPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)),
AI->getType());
@@ -3105,7 +3195,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
FakeStack,
ConstantInt::get(IntptrTy, ClassSize - ASan.LongSize / 8));
Value *SavedFlagPtr = IRBPoison.CreateLoad(
- IRBPoison.CreateIntToPtr(SavedFlagPtrPtr, IntptrPtrTy));
+ IntptrTy, IRBPoison.CreateIntToPtr(SavedFlagPtrPtr, IntptrPtrTy));
IRBPoison.CreateStore(
Constant::getNullValue(IRBPoison.getInt8Ty()),
IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getInt8PtrTy()));
@@ -3145,41 +3235,6 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
// variable may go in and out of scope several times, e.g. in loops).
// (3) if we poisoned at least one %alloca in a function,
// unpoison the whole stack frame at function exit.
-
-AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
- if (AllocaInst *AI = dyn_cast<AllocaInst>(V))
- // We're interested only in allocas we can handle.
- return ASan.isInterestingAlloca(*AI) ? AI : nullptr;
- // See if we've already calculated (or started to calculate) alloca for a
- // given value.
- AllocaForValueMapTy::iterator I = AllocaForValue.find(V);
- if (I != AllocaForValue.end()) return I->second;
- // Store 0 while we're calculating alloca for value V to avoid
- // infinite recursion if the value references itself.
- AllocaForValue[V] = nullptr;
- AllocaInst *Res = nullptr;
- if (CastInst *CI = dyn_cast<CastInst>(V))
- Res = findAllocaForValue(CI->getOperand(0));
- else if (PHINode *PN = dyn_cast<PHINode>(V)) {
- for (Value *IncValue : PN->incoming_values()) {
- // Allow self-referencing phi-nodes.
- if (IncValue == PN) continue;
- AllocaInst *IncValueAI = findAllocaForValue(IncValue);
- // AI for incoming values should exist and should all be equal.
- if (IncValueAI == nullptr || (Res != nullptr && IncValueAI != Res))
- return nullptr;
- Res = IncValueAI;
- }
- } else if (GetElementPtrInst *EP = dyn_cast<GetElementPtrInst>(V)) {
- Res = findAllocaForValue(EP->getPointerOperand());
- } else {
- LLVM_DEBUG(dbgs() << "Alloca search canceled on unknown instruction: " << *V
- << "\n");
- }
- if (Res) AllocaForValue[V] = Res;
- return Res;
-}
-
void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) {
IRBuilder<> IRB(AI);
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index a0c78e0468c6..4dc9b611c156 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -1,9 +1,8 @@
//===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -143,8 +142,9 @@ static void insertBoundsCheck(Value *Or, BuilderTy IRB, GetTrapBBT GetTrapBB) {
static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI,
ScalarEvolution &SE) {
const DataLayout &DL = F.getParent()->getDataLayout();
- ObjectSizeOffsetEvaluator ObjSizeEval(DL, &TLI, F.getContext(),
- /*RoundToAlign=*/true);
+ ObjectSizeOpts EvalOpts;
+ EvalOpts.RoundToAlign = true;
+ ObjectSizeOffsetEvaluator ObjSizeEval(DL, &TLI, F.getContext(), EvalOpts);
// check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
// touching instructions
diff --git a/lib/Transforms/Instrumentation/CFGMST.h b/lib/Transforms/Instrumentation/CFGMST.h
index e178ef386e68..971e00041762 100644
--- a/lib/Transforms/Instrumentation/CFGMST.h
+++ b/lib/Transforms/Instrumentation/CFGMST.h
@@ -1,9 +1,8 @@
//===-- CFGMST.h - Minimum Spanning Tree for CFG ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -196,11 +195,10 @@ public:
// Sort CFG edges based on its weight.
void sortEdgesByWeight() {
- std::stable_sort(AllEdges.begin(), AllEdges.end(),
- [](const std::unique_ptr<Edge> &Edge1,
- const std::unique_ptr<Edge> &Edge2) {
- return Edge1->Weight > Edge2->Weight;
- });
+ llvm::stable_sort(AllEdges, [](const std::unique_ptr<Edge> &Edge1,
+ const std::unique_ptr<Edge> &Edge2) {
+ return Edge1->Weight > Edge2->Weight;
+ });
}
// Traverse all the edges and compute the Minimum Weight Spanning Tree
diff --git a/lib/Transforms/Instrumentation/CGProfile.cpp b/lib/Transforms/Instrumentation/CGProfile.cpp
index cdcd01726906..358abab3cceb 100644
--- a/lib/Transforms/Instrumentation/CGProfile.cpp
+++ b/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -1,9 +1,8 @@
//===-- CGProfile.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 1ada0b713092..3f4f9bc7145d 100644
--- a/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -1,9 +1,8 @@
//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -547,19 +546,25 @@ static std::set<Value *> getBaseValues(Value *V,
static bool
checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
DenseSet<Instruction *> &Unhoistables,
- DenseSet<Instruction *> *HoistStops) {
+ DenseSet<Instruction *> *HoistStops,
+ DenseMap<Instruction *, bool> &Visited) {
assert(InsertPoint && "Null InsertPoint");
if (auto *I = dyn_cast<Instruction>(V)) {
+ if (Visited.count(I)) {
+ return Visited[I];
+ }
assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
if (Unhoistables.count(I)) {
// Don't hoist if they are not to be hoisted.
+ Visited[I] = false;
return false;
}
if (DT.dominates(I, InsertPoint)) {
// We are already above the insert point. Stop here.
if (HoistStops)
HoistStops->insert(I);
+ Visited[I] = true;
return true;
}
// We aren't not above the insert point, check if we can hoist it above the
@@ -569,7 +574,8 @@ checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
DenseSet<Instruction *> OpsHoistStops;
bool AllOpsHoisted = true;
for (Value *Op : I->operands()) {
- if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) {
+ if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
+ Visited)) {
AllOpsHoisted = false;
break;
}
@@ -578,9 +584,11 @@ checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
if (HoistStops)
HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
+ Visited[I] = true;
return true;
}
}
+ Visited[I] = false;
return false;
}
// Non-instructions are considered hoistable.
@@ -893,8 +901,9 @@ void CHR::checkScopeHoistable(CHRScope *Scope) {
++it;
continue;
}
+ DenseMap<Instruction *, bool> Visited;
bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
- DT, Unhoistables, nullptr);
+ DT, Unhoistables, nullptr, Visited);
if (!IsHoistable) {
CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
ORE.emit([&]() {
@@ -913,8 +922,9 @@ void CHR::checkScopeHoistable(CHRScope *Scope) {
InsertPoint = getBranchInsertPoint(RI);
CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
if (RI.HasBranch && InsertPoint != Branch) {
+ DenseMap<Instruction *, bool> Visited;
bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
- DT, Unhoistables, nullptr);
+ DT, Unhoistables, nullptr, Visited);
if (!IsHoistable) {
// If the branch isn't hoistable, drop the selects in the entry
// block, preferring the branch, which makes the branch the hoist
@@ -945,15 +955,17 @@ void CHR::checkScopeHoistable(CHRScope *Scope) {
if (RI.HasBranch) {
assert(!DT.dominates(Branch, InsertPoint) &&
"Branch can't be already above the hoist point");
+ DenseMap<Instruction *, bool> Visited;
assert(checkHoistValue(Branch->getCondition(), InsertPoint,
- DT, Unhoistables, nullptr) &&
+ DT, Unhoistables, nullptr, Visited) &&
"checkHoistValue for branch");
}
for (auto *SI : Selects) {
assert(!DT.dominates(SI, InsertPoint) &&
"SI can't be already above the hoist point");
+ DenseMap<Instruction *, bool> Visited;
assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
- Unhoistables, nullptr) &&
+ Unhoistables, nullptr, Visited) &&
"checkHoistValue for selects");
}
CHR_DEBUG(dbgs() << "Result\n");
@@ -1054,7 +1066,8 @@ static bool shouldSplit(Instruction *InsertPoint,
assert(InsertPoint && "Null InsertPoint");
// If any of Bases isn't hoistable to the hoist point, split.
for (Value *V : ConditionValues) {
- if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) {
+ DenseMap<Instruction *, bool> Visited;
+ if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
return true; // Not hoistable, split.
}
@@ -1383,8 +1396,9 @@ void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
"Must be truthy or falsy");
auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
// Note checkHoistValue fills in HoistStops.
+ DenseMap<Instruction *, bool> Visited;
bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
- Unhoistables, &HoistStops);
+ Unhoistables, &HoistStops, Visited);
assert(IsHoistable && "Must be hoistable");
(void)(IsHoistable); // Unused in release build
IsHoisted = true;
@@ -1394,8 +1408,9 @@ void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
OutermostScope->FalseBiasedSelects.count(SI) > 0) &&
"Must be true or false biased");
// Note checkHoistValue fills in HoistStops.
+ DenseMap<Instruction *, bool> Visited;
bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
- Unhoistables, &HoistStops);
+ Unhoistables, &HoistStops, Visited);
assert(IsHoistable && "Must be hoistable");
(void)(IsHoistable); // Unused in release build
IsHoisted = true;
@@ -1417,7 +1432,7 @@ void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
SmallVectorImpl<CHRScope *> &Output) {
Output.resize(Input.size());
llvm::copy(Input, Output.begin());
- std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter);
+ llvm::stable_sort(Output, CHRScopeSorter);
}
// Return true if V is already hoisted or was hoisted (along with its operands)
@@ -1425,7 +1440,8 @@ void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
HoistStopMapTy &HoistStopMap,
DenseSet<Instruction *> &HoistedSet,
- DenseSet<PHINode *> &TrivialPHIs) {
+ DenseSet<PHINode *> &TrivialPHIs,
+ DominatorTree &DT) {
auto IT = HoistStopMap.find(R);
assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
DenseSet<Instruction *> &HoistStops = IT->second;
@@ -1445,8 +1461,21 @@ static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
// Already hoisted, return.
return;
assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
+ assert(DT.getNode(I->getParent()) && "DT must contain I's block");
+ assert(DT.getNode(HoistPoint->getParent()) &&
+ "DT must contain HoistPoint block");
+ if (DT.dominates(I, HoistPoint))
+ // We are already above the hoist point. Stop here. This may be necessary
+ // when multiple scopes would independently hoist the same
+ // instruction. Since an outer (dominating) scope would hoist it to its
+ // entry before an inner (dominated) scope would to its entry, the inner
+ // scope may see the instruction already hoisted, in which case it
+ // potentially wrong for the inner scope to hoist it and could cause bad
+ // IR (non-dominating def), but safe to skip hoisting it instead because
+ // it's already in a block that dominates the inner scope.
+ return;
for (Value *Op : I->operands()) {
- hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs);
+ hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
}
I->moveBefore(HoistPoint);
HoistedSet.insert(I);
@@ -1457,7 +1486,8 @@ static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
// Hoist the dependent condition values of the branches and the selects in the
// scope to the insert point.
static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
- DenseSet<PHINode *> &TrivialPHIs) {
+ DenseSet<PHINode *> &TrivialPHIs,
+ DominatorTree &DT) {
DenseSet<Instruction *> HoistedSet;
for (const RegInfo &RI : Scope->CHRRegions) {
Region *R = RI.R;
@@ -1466,7 +1496,7 @@ static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
- HoistedSet, TrivialPHIs);
+ HoistedSet, TrivialPHIs, DT);
}
for (SelectInst *SI : RI.Selects) {
bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
@@ -1474,7 +1504,7 @@ static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
if (!(IsTrueBiased || IsFalseBiased))
continue;
hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
- HoistedSet, TrivialPHIs);
+ HoistedSet, TrivialPHIs, DT);
}
}
}
@@ -1708,7 +1738,7 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
#endif
// Hoist the conditional values of the branches/selects.
- hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs);
+ hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
#ifndef NDEBUG
assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 4c3c6c9added..2279c1bcb6a8 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -1,9 +1,8 @@
//===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -333,6 +332,8 @@ class DataFlowSanitizer : public ModulePass {
Constant *RetvalTLS;
void *(*GetArgTLSPtr)();
void *(*GetRetvalTLSPtr)();
+ FunctionType *GetArgTLSTy;
+ FunctionType *GetRetvalTLSTy;
Constant *GetArgTLS;
Constant *GetRetvalTLS;
Constant *ExternalShadowMask;
@@ -342,13 +343,13 @@ class DataFlowSanitizer : public ModulePass {
FunctionType *DFSanSetLabelFnTy;
FunctionType *DFSanNonzeroLabelFnTy;
FunctionType *DFSanVarargWrapperFnTy;
- Constant *DFSanUnionFn;
- Constant *DFSanCheckedUnionFn;
- Constant *DFSanUnionLoadFn;
- Constant *DFSanUnimplementedFn;
- Constant *DFSanSetLabelFn;
- Constant *DFSanNonzeroLabelFn;
- Constant *DFSanVarargWrapperFn;
+ FunctionCallee DFSanUnionFn;
+ FunctionCallee DFSanCheckedUnionFn;
+ FunctionCallee DFSanUnionLoadFn;
+ FunctionCallee DFSanUnimplementedFn;
+ FunctionCallee DFSanSetLabelFn;
+ FunctionCallee DFSanNonzeroLabelFn;
+ FunctionCallee DFSanVarargWrapperFn;
MDNode *ColdCallWeights;
DFSanABIList ABIList;
DenseMap<Value *, Function *> UnwrappedFnMap;
@@ -436,6 +437,7 @@ public:
}
void visitOperandShadowInst(Instruction &I);
+ void visitUnaryOperator(UnaryOperator &UO);
void visitBinaryOperator(BinaryOperator &BO);
void visitCastInst(CastInst &CI);
void visitCmpInst(CmpInst &CI);
@@ -581,17 +583,17 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
if (GetArgTLSPtr) {
Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
ArgTLS = nullptr;
+ GetArgTLSTy = FunctionType::get(PointerType::getUnqual(ArgTLSTy), false);
GetArgTLS = ConstantExpr::getIntToPtr(
ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)),
- PointerType::getUnqual(
- FunctionType::get(PointerType::getUnqual(ArgTLSTy), false)));
+ PointerType::getUnqual(GetArgTLSTy));
}
if (GetRetvalTLSPtr) {
RetvalTLS = nullptr;
+ GetRetvalTLSTy = FunctionType::get(PointerType::getUnqual(ShadowTy), false);
GetRetvalTLS = ConstantExpr::getIntToPtr(
ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)),
- PointerType::getUnqual(
- FunctionType::get(PointerType::getUnqual(ShadowTy), false)));
+ PointerType::getUnqual(GetRetvalTLSTy));
}
ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
@@ -678,8 +680,8 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
StringRef FName) {
FunctionType *FTT = getTrampolineFunctionType(FT);
- Constant *C = Mod->getOrInsertFunction(FName, FTT);
- Function *F = dyn_cast<Function>(C);
+ FunctionCallee C = Mod->getOrInsertFunction(FName, FTT);
+ Function *F = dyn_cast<Function>(C.getCallee());
if (F && F->isDeclaration()) {
F->setLinkage(GlobalValue::LinkOnceODRLinkage);
BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
@@ -687,7 +689,7 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
Function::arg_iterator AI = F->arg_begin(); ++AI;
for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N)
Args.push_back(&*AI);
- CallInst *CI = CallInst::Create(&*F->arg_begin(), Args, "", BB);
+ CallInst *CI = CallInst::Create(FT, &*F->arg_begin(), Args, "", BB);
ReturnInst *RI;
if (FT->getReturnType()->isVoidTy())
RI = ReturnInst::Create(*Ctx, BB);
@@ -704,7 +706,7 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
&*std::prev(F->arg_end()), RI);
}
- return C;
+ return cast<Constant>(C.getCallee());
}
bool DataFlowSanitizer::runOnModule(Module &M) {
@@ -726,35 +728,51 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
ExternalShadowMask =
Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy);
- DFSanUnionFn = Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy);
- if (Function *F = dyn_cast<Function>(DFSanUnionFn)) {
- F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
- F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
- F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
- F->addParamAttr(0, Attribute::ZExt);
- F->addParamAttr(1, Attribute::ZExt);
+ {
+ AttributeList AL;
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::ReadNone);
+ AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
+ Attribute::ZExt);
+ AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
+ AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
+ DFSanUnionFn =
+ Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy, AL);
}
- DFSanCheckedUnionFn = Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy);
- if (Function *F = dyn_cast<Function>(DFSanCheckedUnionFn)) {
- F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
- F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
- F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
- F->addParamAttr(0, Attribute::ZExt);
- F->addParamAttr(1, Attribute::ZExt);
+
+ {
+ AttributeList AL;
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::ReadNone);
+ AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
+ Attribute::ZExt);
+ AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
+ AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
+ DFSanCheckedUnionFn =
+ Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy, AL);
}
- DFSanUnionLoadFn =
- Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy);
- if (Function *F = dyn_cast<Function>(DFSanUnionLoadFn)) {
- F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
- F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly);
- F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
+ {
+ AttributeList AL;
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::ReadOnly);
+ AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
+ Attribute::ZExt);
+ DFSanUnionLoadFn =
+ Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
}
DFSanUnimplementedFn =
Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
- DFSanSetLabelFn =
- Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy);
- if (Function *F = dyn_cast<Function>(DFSanSetLabelFn)) {
- F->addParamAttr(0, Attribute::ZExt);
+ {
+ AttributeList AL;
+ AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
+ DFSanSetLabelFn =
+ Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL);
}
DFSanNonzeroLabelFn =
Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
@@ -765,13 +783,13 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
SmallPtrSet<Function *, 2> FnsWithNativeABI;
for (Function &i : M) {
if (!i.isIntrinsic() &&
- &i != DFSanUnionFn &&
- &i != DFSanCheckedUnionFn &&
- &i != DFSanUnionLoadFn &&
- &i != DFSanUnimplementedFn &&
- &i != DFSanSetLabelFn &&
- &i != DFSanNonzeroLabelFn &&
- &i != DFSanVarargWrapperFn)
+ &i != DFSanUnionFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanCheckedUnionFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanUnionLoadFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanUnimplementedFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanSetLabelFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanNonzeroLabelFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
FnsToInstrument.push_back(&i);
}
@@ -982,7 +1000,7 @@ Value *DFSanFunction::getArgTLSPtr() {
return ArgTLSPtr = DFS.ArgTLS;
IRBuilder<> IRB(&F->getEntryBlock().front());
- return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLS, {});
+ return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLSTy, DFS.GetArgTLS, {});
}
Value *DFSanFunction::getRetvalTLS() {
@@ -992,12 +1010,14 @@ Value *DFSanFunction::getRetvalTLS() {
return RetvalTLSPtr = DFS.RetvalTLS;
IRBuilder<> IRB(&F->getEntryBlock().front());
- return RetvalTLSPtr = IRB.CreateCall(DFS.GetRetvalTLS, {});
+ return RetvalTLSPtr =
+ IRB.CreateCall(DFS.GetRetvalTLSTy, DFS.GetRetvalTLS, {});
}
Value *DFSanFunction::getArgTLS(unsigned Idx, Instruction *Pos) {
IRBuilder<> IRB(Pos);
- return IRB.CreateConstGEP2_64(getArgTLSPtr(), 0, Idx);
+ return IRB.CreateConstGEP2_64(ArrayType::get(DFS.ShadowTy, 64),
+ getArgTLSPtr(), 0, Idx);
}
Value *DFSanFunction::getShadow(Value *V) {
@@ -1015,7 +1035,8 @@ Value *DFSanFunction::getShadow(Value *V) {
DFS.ArgTLS ? &*F->getEntryBlock().begin()
: cast<Instruction>(ArgTLSPtr)->getNextNode();
IRBuilder<> IRB(ArgTLSPos);
- Shadow = IRB.CreateLoad(getArgTLS(A->getArgNo(), ArgTLSPos));
+ Shadow =
+ IRB.CreateLoad(DFS.ShadowTy, getArgTLS(A->getArgNo(), ArgTLSPos));
break;
}
case DataFlowSanitizer::IA_Args: {
@@ -1165,15 +1186,15 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
const auto i = AllocaShadowMap.find(AI);
if (i != AllocaShadowMap.end()) {
IRBuilder<> IRB(Pos);
- return IRB.CreateLoad(i->second);
+ return IRB.CreateLoad(DFS.ShadowTy, i->second);
}
}
uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8;
- SmallVector<Value *, 2> Objs;
+ SmallVector<const Value *, 2> Objs;
GetUnderlyingObjects(Addr, Objs, Pos->getModule()->getDataLayout());
bool AllConstants = true;
- for (Value *Obj : Objs) {
+ for (const Value *Obj : Objs) {
if (isa<Function>(Obj) || isa<BlockAddress>(Obj))
continue;
if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant())
@@ -1190,7 +1211,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
case 0:
return DFS.ZeroShadow;
case 1: {
- LoadInst *LI = new LoadInst(ShadowAddr, "", Pos);
+ LoadInst *LI = new LoadInst(DFS.ShadowTy, ShadowAddr, "", Pos);
LI->setAlignment(ShadowAlign);
return LI;
}
@@ -1198,8 +1219,9 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
IRBuilder<> IRB(Pos);
Value *ShadowAddr1 = IRB.CreateGEP(DFS.ShadowTy, ShadowAddr,
ConstantInt::get(DFS.IntptrTy, 1));
- return combineShadows(IRB.CreateAlignedLoad(ShadowAddr, ShadowAlign),
- IRB.CreateAlignedLoad(ShadowAddr1, ShadowAlign), Pos);
+ return combineShadows(
+ IRB.CreateAlignedLoad(DFS.ShadowTy, ShadowAddr, ShadowAlign),
+ IRB.CreateAlignedLoad(DFS.ShadowTy, ShadowAddr1, ShadowAlign), Pos);
}
}
if (!AvoidNewBlocks && Size % (64 / DFS.ShadowWidth) == 0) {
@@ -1218,7 +1240,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
IRBuilder<> IRB(Pos);
Value *WideAddr =
IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx));
- Value *WideShadow = IRB.CreateAlignedLoad(WideAddr, ShadowAlign);
+ Value *WideShadow =
+ IRB.CreateAlignedLoad(IRB.getInt64Ty(), WideAddr, ShadowAlign);
Value *TruncShadow = IRB.CreateTrunc(WideShadow, DFS.ShadowTy);
Value *ShlShadow = IRB.CreateShl(WideShadow, DFS.ShadowWidth);
Value *ShrShadow = IRB.CreateLShr(WideShadow, 64 - DFS.ShadowWidth);
@@ -1251,7 +1274,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
IRBuilder<> NextIRB(NextBB);
WideAddr = NextIRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr,
ConstantInt::get(DFS.IntptrTy, 1));
- Value *NextWideShadow = NextIRB.CreateAlignedLoad(WideAddr, ShadowAlign);
+ Value *NextWideShadow = NextIRB.CreateAlignedLoad(NextIRB.getInt64Ty(),
+ WideAddr, ShadowAlign);
ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow);
LastBr->setSuccessor(0, NextBB);
LastBr = NextIRB.CreateCondBr(ShadowsEq, FallbackBB, FallbackBB);
@@ -1375,6 +1399,10 @@ void DFSanVisitor::visitStoreInst(StoreInst &SI) {
DFSF.storeShadow(SI.getPointerOperand(), Size, Align, Shadow, &SI);
}
+void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) {
+ visitOperandShadowInst(UO);
+}
+
void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
visitOperandShadowInst(BO);
}
@@ -1470,7 +1498,7 @@ void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr);
SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
auto *MTI = cast<MemTransferInst>(
- IRB.CreateCall(I.getCalledValue(),
+ IRB.CreateCall(I.getFunctionType(), I.getCalledValue(),
{DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
if (ClPreserveAlignment) {
MTI->setDestAlignment(I.getDestAlignment() * (DFSF.DFS.ShadowWidth / 8));
@@ -1513,7 +1541,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
// Calls to this function are synthesized in wrappers, and we shouldn't
// instrument them.
- if (F == DFSF.DFS.DFSanVarargWrapperFn)
+ if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
return;
IRBuilder<> IRB(CS.getInstruction());
@@ -1546,9 +1574,9 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT);
std::string CustomFName = "__dfsw_";
CustomFName += F->getName();
- Constant *CustomF = DFSF.DFS.Mod->getOrInsertFunction(
+ FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction(
CustomFName, CustomFn.TransformedType);
- if (Function *CustomFn = dyn_cast<Function>(CustomF)) {
+ if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) {
CustomFn->copyAttributesFrom(F);
// Custom functions returning non-void will write to the return label.
@@ -1628,7 +1656,8 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
}
if (!FT->getReturnType()->isVoidTy()) {
- LoadInst *LabelLoad = IRB.CreateLoad(DFSF.LabelReturnAlloca);
+ LoadInst *LabelLoad =
+ IRB.CreateLoad(DFSF.DFS.ShadowTy, DFSF.LabelReturnAlloca);
DFSF.setShadow(CustomCI, LabelLoad);
}
@@ -1666,7 +1695,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
IRBuilder<> NextIRB(Next);
- LoadInst *LI = NextIRB.CreateLoad(DFSF.getRetvalTLS());
+ LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.ShadowTy, DFSF.getRetvalTLS());
DFSF.SkipInsts.insert(LI);
DFSF.setShadow(CS.getInstruction(), LI);
DFSF.NonZeroChecks.push_back(LI);
@@ -1706,10 +1735,10 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
CallSite NewCS;
if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
- NewCS = IRB.CreateInvoke(Func, II->getNormalDest(), II->getUnwindDest(),
- Args);
+ NewCS = IRB.CreateInvoke(NewFT, Func, II->getNormalDest(),
+ II->getUnwindDest(), Args);
} else {
- NewCS = IRB.CreateCall(Func, Args);
+ NewCS = IRB.CreateCall(NewFT, Func, Args);
}
NewCS.setCallingConv(CS.getCallingConv());
NewCS.setAttributes(CS.getAttributes().removeAttributes(
diff --git a/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp b/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
deleted file mode 100644
index db438e78ded9..000000000000
--- a/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
+++ /dev/null
@@ -1,900 +0,0 @@
-//===-- EfficiencySanitizer.cpp - performance tuner -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of EfficiencySanitizer, a family of performance tuners
-// that detects multiple performance issues via separate sub-tools.
-//
-// The instrumentation phase is straightforward:
-// - Take action on every memory access: either inlined instrumentation,
-// or Inserted calls to our run-time library.
-// - Optimizations may apply to avoid instrumenting some of the accesses.
-// - Turn mem{set,cpy,move} instrinsics into library calls.
-// The rest is handled by the run-time library.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Instrumentation.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "esan"
-
-// The tool type must be just one of these ClTool* options, as the tools
-// cannot be combined due to shadow memory constraints.
-static cl::opt<bool>
- ClToolCacheFrag("esan-cache-frag", cl::init(false),
- cl::desc("Detect data cache fragmentation"), cl::Hidden);
-static cl::opt<bool>
- ClToolWorkingSet("esan-working-set", cl::init(false),
- cl::desc("Measure the working set size"), cl::Hidden);
-// Each new tool will get its own opt flag here.
-// These are converted to EfficiencySanitizerOptions for use
-// in the code.
-
-static cl::opt<bool> ClInstrumentLoadsAndStores(
- "esan-instrument-loads-and-stores", cl::init(true),
- cl::desc("Instrument loads and stores"), cl::Hidden);
-static cl::opt<bool> ClInstrumentMemIntrinsics(
- "esan-instrument-memintrinsics", cl::init(true),
- cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
-static cl::opt<bool> ClInstrumentFastpath(
- "esan-instrument-fastpath", cl::init(true),
- cl::desc("Instrument fastpath"), cl::Hidden);
-static cl::opt<bool> ClAuxFieldInfo(
- "esan-aux-field-info", cl::init(true),
- cl::desc("Generate binary with auxiliary struct field information"),
- cl::Hidden);
-
-// Experiments show that the performance difference can be 2x or more,
-// and accuracy loss is typically negligible, so we turn this on by default.
-static cl::opt<bool> ClAssumeIntraCacheLine(
- "esan-assume-intra-cache-line", cl::init(true),
- cl::desc("Assume each memory access touches just one cache line, for "
- "better performance but with a potential loss of accuracy."),
- cl::Hidden);
-
-STATISTIC(NumInstrumentedLoads, "Number of instrumented loads");
-STATISTIC(NumInstrumentedStores, "Number of instrumented stores");
-STATISTIC(NumFastpaths, "Number of instrumented fastpaths");
-STATISTIC(NumAccessesWithIrregularSize,
- "Number of accesses with a size outside our targeted callout sizes");
-STATISTIC(NumIgnoredStructs, "Number of ignored structs");
-STATISTIC(NumIgnoredGEPs, "Number of ignored GEP instructions");
-STATISTIC(NumInstrumentedGEPs, "Number of instrumented GEP instructions");
-STATISTIC(NumAssumedIntraCacheLine,
- "Number of accesses assumed to be intra-cache-line");
-
-static const uint64_t EsanCtorAndDtorPriority = 0;
-static const char *const EsanModuleCtorName = "esan.module_ctor";
-static const char *const EsanModuleDtorName = "esan.module_dtor";
-static const char *const EsanInitName = "__esan_init";
-static const char *const EsanExitName = "__esan_exit";
-
-// We need to specify the tool to the runtime earlier than
-// the ctor is called in some cases, so we set a global variable.
-static const char *const EsanWhichToolName = "__esan_which_tool";
-
-// We must keep these Shadow* constants consistent with the esan runtime.
-// FIXME: Try to place these shadow constants, the names of the __esan_*
-// interface functions, and the ToolType enum into a header shared between
-// llvm and compiler-rt.
-struct ShadowMemoryParams {
- uint64_t ShadowMask;
- uint64_t ShadowOffs[3];
-};
-
-static const ShadowMemoryParams ShadowParams47 = {
- 0x00000fffffffffffull,
- {
- 0x0000130000000000ull, 0x0000220000000000ull, 0x0000440000000000ull,
- }};
-
-static const ShadowMemoryParams ShadowParams40 = {
- 0x0fffffffffull,
- {
- 0x1300000000ull, 0x2200000000ull, 0x4400000000ull,
- }};
-
-// This array is indexed by the ToolType enum.
-static const int ShadowScale[] = {
- 0, // ESAN_None.
- 2, // ESAN_CacheFrag: 4B:1B, so 4 to 1 == >>2.
- 6, // ESAN_WorkingSet: 64B:1B, so 64 to 1 == >>6.
-};
-
-// MaxStructCounterNameSize is a soft size limit to avoid insanely long
-// names for those extremely large structs.
-static const unsigned MaxStructCounterNameSize = 512;
-
-namespace {
-
-static EfficiencySanitizerOptions
-OverrideOptionsFromCL(EfficiencySanitizerOptions Options) {
- if (ClToolCacheFrag)
- Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag;
- else if (ClToolWorkingSet)
- Options.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet;
-
- // Direct opt invocation with no params will have the default ESAN_None.
- // We run the default tool in that case.
- if (Options.ToolType == EfficiencySanitizerOptions::ESAN_None)
- Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag;
-
- return Options;
-}
-
-/// EfficiencySanitizer: instrument each module to find performance issues.
-class EfficiencySanitizer : public ModulePass {
-public:
- EfficiencySanitizer(
- const EfficiencySanitizerOptions &Opts = EfficiencySanitizerOptions())
- : ModulePass(ID), Options(OverrideOptionsFromCL(Opts)) {}
- StringRef getPassName() const override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnModule(Module &M) override;
- static char ID;
-
-private:
- bool initOnModule(Module &M);
- void initializeCallbacks(Module &M);
- bool shouldIgnoreStructType(StructType *StructTy);
- void createStructCounterName(
- StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr);
- void createCacheFragAuxGV(
- Module &M, const DataLayout &DL, StructType *StructTy,
- GlobalVariable *&TypeNames, GlobalVariable *&Offsets, GlobalVariable *&Size);
- GlobalVariable *createCacheFragInfoGV(Module &M, const DataLayout &DL,
- Constant *UnitName);
- Constant *createEsanInitToolInfoArg(Module &M, const DataLayout &DL);
- void createDestructor(Module &M, Constant *ToolInfoArg);
- bool runOnFunction(Function &F, Module &M);
- bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL);
- bool instrumentMemIntrinsic(MemIntrinsic *MI);
- bool instrumentGetElementPtr(Instruction *I, Module &M);
- bool insertCounterUpdate(Instruction *I, StructType *StructTy,
- unsigned CounterIdx);
- unsigned getFieldCounterIdx(StructType *StructTy) {
- return 0;
- }
- unsigned getArrayCounterIdx(StructType *StructTy) {
- return StructTy->getNumElements();
- }
- unsigned getStructCounterSize(StructType *StructTy) {
- // The struct counter array includes:
- // - one counter for each struct field,
- // - one counter for the struct access within an array.
- return (StructTy->getNumElements()/*field*/ + 1/*array*/);
- }
- bool shouldIgnoreMemoryAccess(Instruction *I);
- int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL);
- Value *appToShadow(Value *Shadow, IRBuilder<> &IRB);
- bool instrumentFastpath(Instruction *I, const DataLayout &DL, bool IsStore,
- Value *Addr, unsigned Alignment);
- // Each tool has its own fastpath routine:
- bool instrumentFastpathCacheFrag(Instruction *I, const DataLayout &DL,
- Value *Addr, unsigned Alignment);
- bool instrumentFastpathWorkingSet(Instruction *I, const DataLayout &DL,
- Value *Addr, unsigned Alignment);
-
- EfficiencySanitizerOptions Options;
- LLVMContext *Ctx;
- Type *IntptrTy;
- // Our slowpath involves callouts to the runtime library.
- // Access sizes are powers of two: 1, 2, 4, 8, 16.
- static const size_t NumberOfAccessSizes = 5;
- Function *EsanAlignedLoad[NumberOfAccessSizes];
- Function *EsanAlignedStore[NumberOfAccessSizes];
- Function *EsanUnalignedLoad[NumberOfAccessSizes];
- Function *EsanUnalignedStore[NumberOfAccessSizes];
- // For irregular sizes of any alignment:
- Function *EsanUnalignedLoadN, *EsanUnalignedStoreN;
- Function *MemmoveFn, *MemcpyFn, *MemsetFn;
- Function *EsanCtorFunction;
- Function *EsanDtorFunction;
- // Remember the counter variable for each struct type to avoid
- // recomputing the variable name later during instrumentation.
- std::map<Type *, GlobalVariable *> StructTyMap;
- ShadowMemoryParams ShadowParams;
-};
-} // namespace
-
-char EfficiencySanitizer::ID = 0;
-INITIALIZE_PASS_BEGIN(
- EfficiencySanitizer, "esan",
- "EfficiencySanitizer: finds performance issues.", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(
- EfficiencySanitizer, "esan",
- "EfficiencySanitizer: finds performance issues.", false, false)
-
-StringRef EfficiencySanitizer::getPassName() const {
- return "EfficiencySanitizer";
-}
-
-void EfficiencySanitizer::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
-}
-
-ModulePass *
-llvm::createEfficiencySanitizerPass(const EfficiencySanitizerOptions &Options) {
- return new EfficiencySanitizer(Options);
-}
-
-void EfficiencySanitizer::initializeCallbacks(Module &M) {
- IRBuilder<> IRB(M.getContext());
- // Initialize the callbacks.
- for (size_t Idx = 0; Idx < NumberOfAccessSizes; ++Idx) {
- const unsigned ByteSize = 1U << Idx;
- std::string ByteSizeStr = utostr(ByteSize);
- // We'll inline the most common (i.e., aligned and frequent sizes)
- // load + store instrumentation: these callouts are for the slowpath.
- SmallString<32> AlignedLoadName("__esan_aligned_load" + ByteSizeStr);
- EsanAlignedLoad[Idx] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- AlignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
- SmallString<32> AlignedStoreName("__esan_aligned_store" + ByteSizeStr);
- EsanAlignedStore[Idx] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- AlignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
- SmallString<32> UnalignedLoadName("__esan_unaligned_load" + ByteSizeStr);
- EsanUnalignedLoad[Idx] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- UnalignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
- SmallString<32> UnalignedStoreName("__esan_unaligned_store" + ByteSizeStr);
- EsanUnalignedStore[Idx] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- UnalignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
- }
- EsanUnalignedLoadN = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("__esan_unaligned_loadN", IRB.getVoidTy(),
- IRB.getInt8PtrTy(), IntptrTy));
- EsanUnalignedStoreN = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("__esan_unaligned_storeN", IRB.getVoidTy(),
- IRB.getInt8PtrTy(), IntptrTy));
- MemmoveFn = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy));
- MemcpyFn = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy));
- MemsetFn = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt32Ty(), IntptrTy));
-}
-
-bool EfficiencySanitizer::shouldIgnoreStructType(StructType *StructTy) {
- if (StructTy == nullptr || StructTy->isOpaque() /* no struct body */)
- return true;
- return false;
-}
-
-void EfficiencySanitizer::createStructCounterName(
- StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr) {
- // Append NumFields and field type ids to avoid struct conflicts
- // with the same name but different fields.
- if (StructTy->hasName())
- NameStr += StructTy->getName();
- else
- NameStr += "struct.anon";
- // We allow the actual size of the StructCounterName to be larger than
- // MaxStructCounterNameSize and append $NumFields and at least one
- // field type id.
- // Append $NumFields.
- NameStr += "$";
- Twine(StructTy->getNumElements()).toVector(NameStr);
- // Append struct field type ids in the reverse order.
- for (int i = StructTy->getNumElements() - 1; i >= 0; --i) {
- NameStr += "$";
- Twine(StructTy->getElementType(i)->getTypeID()).toVector(NameStr);
- if (NameStr.size() >= MaxStructCounterNameSize)
- break;
- }
- if (StructTy->isLiteral()) {
- // End with $ for literal struct.
- NameStr += "$";
- }
-}
-
-// Create global variables with auxiliary information (e.g., struct field size,
-// offset, and type name) for better user report.
-void EfficiencySanitizer::createCacheFragAuxGV(
- Module &M, const DataLayout &DL, StructType *StructTy,
- GlobalVariable *&TypeName, GlobalVariable *&Offset,
- GlobalVariable *&Size) {
- auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
- auto *Int32Ty = Type::getInt32Ty(*Ctx);
- // FieldTypeName.
- auto *TypeNameArrayTy = ArrayType::get(Int8PtrTy, StructTy->getNumElements());
- TypeName = new GlobalVariable(M, TypeNameArrayTy, true,
- GlobalVariable::InternalLinkage, nullptr);
- SmallVector<Constant *, 16> TypeNameVec;
- // FieldOffset.
- auto *OffsetArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements());
- Offset = new GlobalVariable(M, OffsetArrayTy, true,
- GlobalVariable::InternalLinkage, nullptr);
- SmallVector<Constant *, 16> OffsetVec;
- // FieldSize
- auto *SizeArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements());
- Size = new GlobalVariable(M, SizeArrayTy, true,
- GlobalVariable::InternalLinkage, nullptr);
- SmallVector<Constant *, 16> SizeVec;
- for (unsigned i = 0; i < StructTy->getNumElements(); ++i) {
- Type *Ty = StructTy->getElementType(i);
- std::string Str;
- raw_string_ostream StrOS(Str);
- Ty->print(StrOS);
- TypeNameVec.push_back(
- ConstantExpr::getPointerCast(
- createPrivateGlobalForString(M, StrOS.str(), true),
- Int8PtrTy));
- OffsetVec.push_back(
- ConstantInt::get(Int32Ty,
- DL.getStructLayout(StructTy)->getElementOffset(i)));
- SizeVec.push_back(ConstantInt::get(Int32Ty,
- DL.getTypeAllocSize(Ty)));
- }
- TypeName->setInitializer(ConstantArray::get(TypeNameArrayTy, TypeNameVec));
- Offset->setInitializer(ConstantArray::get(OffsetArrayTy, OffsetVec));
- Size->setInitializer(ConstantArray::get(SizeArrayTy, SizeVec));
-}
-
-// Create the global variable for the cache-fragmentation tool.
-GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV(
- Module &M, const DataLayout &DL, Constant *UnitName) {
- assert(Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag);
-
- auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
- auto *Int8PtrPtrTy = Int8PtrTy->getPointerTo();
- auto *Int32Ty = Type::getInt32Ty(*Ctx);
- auto *Int32PtrTy = Type::getInt32PtrTy(*Ctx);
- auto *Int64Ty = Type::getInt64Ty(*Ctx);
- auto *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
- // This structure should be kept consistent with the StructInfo struct
- // in the runtime library.
- // struct StructInfo {
- // const char *StructName;
- // u32 Size;
- // u32 NumFields;
- // u32 *FieldOffset; // auxiliary struct field info.
- // u32 *FieldSize; // auxiliary struct field info.
- // const char **FieldTypeName; // auxiliary struct field info.
- // u64 *FieldCounters;
- // u64 *ArrayCounter;
- // };
- auto *StructInfoTy =
- StructType::get(Int8PtrTy, Int32Ty, Int32Ty, Int32PtrTy, Int32PtrTy,
- Int8PtrPtrTy, Int64PtrTy, Int64PtrTy);
- auto *StructInfoPtrTy = StructInfoTy->getPointerTo();
- // This structure should be kept consistent with the CacheFragInfo struct
- // in the runtime library.
- // struct CacheFragInfo {
- // const char *UnitName;
- // u32 NumStructs;
- // StructInfo *Structs;
- // };
- auto *CacheFragInfoTy = StructType::get(Int8PtrTy, Int32Ty, StructInfoPtrTy);
-
- std::vector<StructType *> Vec = M.getIdentifiedStructTypes();
- unsigned NumStructs = 0;
- SmallVector<Constant *, 16> Initializers;
-
- for (auto &StructTy : Vec) {
- if (shouldIgnoreStructType(StructTy)) {
- ++NumIgnoredStructs;
- continue;
- }
- ++NumStructs;
-
- // StructName.
- SmallString<MaxStructCounterNameSize> CounterNameStr;
- createStructCounterName(StructTy, CounterNameStr);
- GlobalVariable *StructCounterName = createPrivateGlobalForString(
- M, CounterNameStr, /*AllowMerging*/true);
-
- // Counters.
- // We create the counter array with StructCounterName and weak linkage
- // so that the structs with the same name and layout from different
- // compilation units will be merged into one.
- auto *CounterArrayTy = ArrayType::get(Int64Ty,
- getStructCounterSize(StructTy));
- GlobalVariable *Counters =
- new GlobalVariable(M, CounterArrayTy, false,
- GlobalVariable::WeakAnyLinkage,
- ConstantAggregateZero::get(CounterArrayTy),
- CounterNameStr);
-
- // Remember the counter variable for each struct type.
- StructTyMap.insert(std::pair<Type *, GlobalVariable *>(StructTy, Counters));
-
- // We pass the field type name array, offset array, and size array to
- // the runtime for better reporting.
- GlobalVariable *TypeName = nullptr, *Offset = nullptr, *Size = nullptr;
- if (ClAuxFieldInfo)
- createCacheFragAuxGV(M, DL, StructTy, TypeName, Offset, Size);
-
- Constant *FieldCounterIdx[2];
- FieldCounterIdx[0] = ConstantInt::get(Int32Ty, 0);
- FieldCounterIdx[1] = ConstantInt::get(Int32Ty,
- getFieldCounterIdx(StructTy));
- Constant *ArrayCounterIdx[2];
- ArrayCounterIdx[0] = ConstantInt::get(Int32Ty, 0);
- ArrayCounterIdx[1] = ConstantInt::get(Int32Ty,
- getArrayCounterIdx(StructTy));
- Initializers.push_back(ConstantStruct::get(
- StructInfoTy,
- ConstantExpr::getPointerCast(StructCounterName, Int8PtrTy),
- ConstantInt::get(Int32Ty,
- DL.getStructLayout(StructTy)->getSizeInBytes()),
- ConstantInt::get(Int32Ty, StructTy->getNumElements()),
- Offset == nullptr ? ConstantPointerNull::get(Int32PtrTy)
- : ConstantExpr::getPointerCast(Offset, Int32PtrTy),
- Size == nullptr ? ConstantPointerNull::get(Int32PtrTy)
- : ConstantExpr::getPointerCast(Size, Int32PtrTy),
- TypeName == nullptr
- ? ConstantPointerNull::get(Int8PtrPtrTy)
- : ConstantExpr::getPointerCast(TypeName, Int8PtrPtrTy),
- ConstantExpr::getGetElementPtr(CounterArrayTy, Counters,
- FieldCounterIdx),
- ConstantExpr::getGetElementPtr(CounterArrayTy, Counters,
- ArrayCounterIdx)));
- }
- // Structs.
- Constant *StructInfo;
- if (NumStructs == 0) {
- StructInfo = ConstantPointerNull::get(StructInfoPtrTy);
- } else {
- auto *StructInfoArrayTy = ArrayType::get(StructInfoTy, NumStructs);
- StructInfo = ConstantExpr::getPointerCast(
- new GlobalVariable(M, StructInfoArrayTy, false,
- GlobalVariable::InternalLinkage,
- ConstantArray::get(StructInfoArrayTy, Initializers)),
- StructInfoPtrTy);
- }
-
- auto *CacheFragInfoGV = new GlobalVariable(
- M, CacheFragInfoTy, true, GlobalVariable::InternalLinkage,
- ConstantStruct::get(CacheFragInfoTy, UnitName,
- ConstantInt::get(Int32Ty, NumStructs), StructInfo));
- return CacheFragInfoGV;
-}
-
-// Create the tool-specific argument passed to EsanInit and EsanExit.
-Constant *EfficiencySanitizer::createEsanInitToolInfoArg(Module &M,
- const DataLayout &DL) {
- // This structure contains tool-specific information about each compilation
- // unit (module) and is passed to the runtime library.
- GlobalVariable *ToolInfoGV = nullptr;
-
- auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
- // Compilation unit name.
- auto *UnitName = ConstantExpr::getPointerCast(
- createPrivateGlobalForString(M, M.getModuleIdentifier(), true),
- Int8PtrTy);
-
- // Create the tool-specific variable.
- if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag)
- ToolInfoGV = createCacheFragInfoGV(M, DL, UnitName);
-
- if (ToolInfoGV != nullptr)
- return ConstantExpr::getPointerCast(ToolInfoGV, Int8PtrTy);
-
- // Create the null pointer if no tool-specific variable created.
- return ConstantPointerNull::get(Int8PtrTy);
-}
-
-void EfficiencySanitizer::createDestructor(Module &M, Constant *ToolInfoArg) {
- PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
- EsanDtorFunction = Function::Create(FunctionType::get(Type::getVoidTy(*Ctx),
- false),
- GlobalValue::InternalLinkage,
- EsanModuleDtorName, &M);
- ReturnInst::Create(*Ctx, BasicBlock::Create(*Ctx, "", EsanDtorFunction));
- IRBuilder<> IRB_Dtor(EsanDtorFunction->getEntryBlock().getTerminator());
- Function *EsanExit = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(EsanExitName, IRB_Dtor.getVoidTy(),
- Int8PtrTy));
- EsanExit->setLinkage(Function::ExternalLinkage);
- IRB_Dtor.CreateCall(EsanExit, {ToolInfoArg});
- appendToGlobalDtors(M, EsanDtorFunction, EsanCtorAndDtorPriority);
-}
-
-bool EfficiencySanitizer::initOnModule(Module &M) {
-
- Triple TargetTriple(M.getTargetTriple());
- if (TargetTriple.isMIPS64())
- ShadowParams = ShadowParams40;
- else
- ShadowParams = ShadowParams47;
-
- Ctx = &M.getContext();
- const DataLayout &DL = M.getDataLayout();
- IRBuilder<> IRB(M.getContext());
- IntegerType *OrdTy = IRB.getInt32Ty();
- PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
- IntptrTy = DL.getIntPtrType(M.getContext());
- // Create the variable passed to EsanInit and EsanExit.
- Constant *ToolInfoArg = createEsanInitToolInfoArg(M, DL);
- // Constructor
- // We specify the tool type both in the EsanWhichToolName global
- // and as an arg to the init routine as a sanity check.
- std::tie(EsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(
- M, EsanModuleCtorName, EsanInitName, /*InitArgTypes=*/{OrdTy, Int8PtrTy},
- /*InitArgs=*/{
- ConstantInt::get(OrdTy, static_cast<int>(Options.ToolType)),
- ToolInfoArg});
- appendToGlobalCtors(M, EsanCtorFunction, EsanCtorAndDtorPriority);
-
- createDestructor(M, ToolInfoArg);
-
- new GlobalVariable(M, OrdTy, true,
- GlobalValue::WeakAnyLinkage,
- ConstantInt::get(OrdTy,
- static_cast<int>(Options.ToolType)),
- EsanWhichToolName);
-
- return true;
-}
-
-Value *EfficiencySanitizer::appToShadow(Value *Shadow, IRBuilder<> &IRB) {
- // Shadow = ((App & Mask) + Offs) >> Scale
- Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, ShadowParams.ShadowMask));
- uint64_t Offs;
- int Scale = ShadowScale[Options.ToolType];
- if (Scale <= 2)
- Offs = ShadowParams.ShadowOffs[Scale];
- else
- Offs = ShadowParams.ShadowOffs[0] << Scale;
- Shadow = IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Offs));
- if (Scale > 0)
- Shadow = IRB.CreateLShr(Shadow, Scale);
- return Shadow;
-}
-
-bool EfficiencySanitizer::shouldIgnoreMemoryAccess(Instruction *I) {
- if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
- // We'd like to know about cache fragmentation in vtable accesses and
- // constant data references, so we do not currently ignore anything.
- return false;
- } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) {
- // TODO: the instrumentation disturbs the data layout on the stack, so we
- // may want to add an option to ignore stack references (if we can
- // distinguish them) to reduce overhead.
- }
- // TODO(bruening): future tools will be returning true for some cases.
- return false;
-}
-
-bool EfficiencySanitizer::runOnModule(Module &M) {
- bool Res = initOnModule(M);
- initializeCallbacks(M);
- for (auto &F : M) {
- Res |= runOnFunction(F, M);
- }
- return Res;
-}
-
-bool EfficiencySanitizer::runOnFunction(Function &F, Module &M) {
- // This is required to prevent instrumenting the call to __esan_init from
- // within the module constructor.
- if (&F == EsanCtorFunction)
- return false;
- SmallVector<Instruction *, 8> LoadsAndStores;
- SmallVector<Instruction *, 8> MemIntrinCalls;
- SmallVector<Instruction *, 8> GetElementPtrs;
- bool Res = false;
- const DataLayout &DL = M.getDataLayout();
- const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-
- for (auto &BB : F) {
- for (auto &Inst : BB) {
- if ((isa<LoadInst>(Inst) || isa<StoreInst>(Inst) ||
- isa<AtomicRMWInst>(Inst) || isa<AtomicCmpXchgInst>(Inst)) &&
- !shouldIgnoreMemoryAccess(&Inst))
- LoadsAndStores.push_back(&Inst);
- else if (isa<MemIntrinsic>(Inst))
- MemIntrinCalls.push_back(&Inst);
- else if (isa<GetElementPtrInst>(Inst))
- GetElementPtrs.push_back(&Inst);
- else if (CallInst *CI = dyn_cast<CallInst>(&Inst))
- maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
- }
- }
-
- if (ClInstrumentLoadsAndStores) {
- for (auto Inst : LoadsAndStores) {
- Res |= instrumentLoadOrStore(Inst, DL);
- }
- }
-
- if (ClInstrumentMemIntrinsics) {
- for (auto Inst : MemIntrinCalls) {
- Res |= instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
- }
- }
-
- if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
- for (auto Inst : GetElementPtrs) {
- Res |= instrumentGetElementPtr(Inst, M);
- }
- }
-
- return Res;
-}
-
-bool EfficiencySanitizer::instrumentLoadOrStore(Instruction *I,
- const DataLayout &DL) {
- IRBuilder<> IRB(I);
- bool IsStore;
- Value *Addr;
- unsigned Alignment;
- if (LoadInst *Load = dyn_cast<LoadInst>(I)) {
- IsStore = false;
- Alignment = Load->getAlignment();
- Addr = Load->getPointerOperand();
- } else if (StoreInst *Store = dyn_cast<StoreInst>(I)) {
- IsStore = true;
- Alignment = Store->getAlignment();
- Addr = Store->getPointerOperand();
- } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
- IsStore = true;
- Alignment = 0;
- Addr = RMW->getPointerOperand();
- } else if (AtomicCmpXchgInst *Xchg = dyn_cast<AtomicCmpXchgInst>(I)) {
- IsStore = true;
- Alignment = 0;
- Addr = Xchg->getPointerOperand();
- } else
- llvm_unreachable("Unsupported mem access type");
-
- Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
- const uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8;
- Value *OnAccessFunc = nullptr;
-
- // Convert 0 to the default alignment.
- if (Alignment == 0)
- Alignment = DL.getPrefTypeAlignment(OrigTy);
-
- if (IsStore)
- NumInstrumentedStores++;
- else
- NumInstrumentedLoads++;
- int Idx = getMemoryAccessFuncIndex(Addr, DL);
- if (Idx < 0) {
- OnAccessFunc = IsStore ? EsanUnalignedStoreN : EsanUnalignedLoadN;
- IRB.CreateCall(OnAccessFunc,
- {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
- ConstantInt::get(IntptrTy, TypeSizeBytes)});
- } else {
- if (ClInstrumentFastpath &&
- instrumentFastpath(I, DL, IsStore, Addr, Alignment)) {
- NumFastpaths++;
- return true;
- }
- if (Alignment == 0 || (Alignment % TypeSizeBytes) == 0)
- OnAccessFunc = IsStore ? EsanAlignedStore[Idx] : EsanAlignedLoad[Idx];
- else
- OnAccessFunc = IsStore ? EsanUnalignedStore[Idx] : EsanUnalignedLoad[Idx];
- IRB.CreateCall(OnAccessFunc,
- IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
- }
- return true;
-}
-
-// It's simplest to replace the memset/memmove/memcpy intrinsics with
-// calls that the runtime library intercepts.
-// Our pass is late enough that calls should not turn back into intrinsics.
-bool EfficiencySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
- IRBuilder<> IRB(MI);
- bool Res = false;
- if (isa<MemSetInst>(MI)) {
- IRB.CreateCall(
- MemsetFn,
- {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(MI->getArgOperand(1), IRB.getInt32Ty(), false),
- IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)});
- MI->eraseFromParent();
- Res = true;
- } else if (isa<MemTransferInst>(MI)) {
- IRB.CreateCall(
- isa<MemCpyInst>(MI) ? MemcpyFn : MemmoveFn,
- {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(MI->getArgOperand(1), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)});
- MI->eraseFromParent();
- Res = true;
- } else
- llvm_unreachable("Unsupported mem intrinsic type");
- return Res;
-}
-
-bool EfficiencySanitizer::instrumentGetElementPtr(Instruction *I, Module &M) {
- GetElementPtrInst *GepInst = dyn_cast<GetElementPtrInst>(I);
- bool Res = false;
- if (GepInst == nullptr || GepInst->getNumIndices() == 1) {
- ++NumIgnoredGEPs;
- return false;
- }
- Type *SourceTy = GepInst->getSourceElementType();
- StructType *StructTy = nullptr;
- ConstantInt *Idx;
- // Check if GEP calculates address from a struct array.
- if (isa<StructType>(SourceTy)) {
- StructTy = cast<StructType>(SourceTy);
- Idx = dyn_cast<ConstantInt>(GepInst->getOperand(1));
- if ((Idx == nullptr || Idx->getSExtValue() != 0) &&
- !shouldIgnoreStructType(StructTy) && StructTyMap.count(StructTy) != 0)
- Res |= insertCounterUpdate(I, StructTy, getArrayCounterIdx(StructTy));
- }
- // Iterate all (except the first and the last) idx within each GEP instruction
- // for possible nested struct field address calculation.
- for (unsigned i = 1; i < GepInst->getNumIndices(); ++i) {
- SmallVector<Value *, 8> IdxVec(GepInst->idx_begin(),
- GepInst->idx_begin() + i);
- Type *Ty = GetElementPtrInst::getIndexedType(SourceTy, IdxVec);
- unsigned CounterIdx = 0;
- if (isa<ArrayType>(Ty)) {
- ArrayType *ArrayTy = cast<ArrayType>(Ty);
- StructTy = dyn_cast<StructType>(ArrayTy->getElementType());
- if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0)
- continue;
- // The last counter for struct array access.
- CounterIdx = getArrayCounterIdx(StructTy);
- } else if (isa<StructType>(Ty)) {
- StructTy = cast<StructType>(Ty);
- if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0)
- continue;
- // Get the StructTy's subfield index.
- Idx = cast<ConstantInt>(GepInst->getOperand(i+1));
- assert(Idx->getSExtValue() >= 0 &&
- Idx->getSExtValue() < StructTy->getNumElements());
- CounterIdx = getFieldCounterIdx(StructTy) + Idx->getSExtValue();
- }
- Res |= insertCounterUpdate(I, StructTy, CounterIdx);
- }
- if (Res)
- ++NumInstrumentedGEPs;
- else
- ++NumIgnoredGEPs;
- return Res;
-}
-
-bool EfficiencySanitizer::insertCounterUpdate(Instruction *I,
- StructType *StructTy,
- unsigned CounterIdx) {
- GlobalVariable *CounterArray = StructTyMap[StructTy];
- if (CounterArray == nullptr)
- return false;
- IRBuilder<> IRB(I);
- Constant *Indices[2];
- // Xref http://llvm.org/docs/LangRef.html#i-getelementptr and
- // http://llvm.org/docs/GetElementPtr.html.
- // The first index of the GEP instruction steps through the first operand,
- // i.e., the array itself.
- Indices[0] = ConstantInt::get(IRB.getInt32Ty(), 0);
- // The second index is the index within the array.
- Indices[1] = ConstantInt::get(IRB.getInt32Ty(), CounterIdx);
- Constant *Counter =
- ConstantExpr::getGetElementPtr(
- ArrayType::get(IRB.getInt64Ty(), getStructCounterSize(StructTy)),
- CounterArray, Indices);
- Value *Load = IRB.CreateLoad(Counter);
- IRB.CreateStore(IRB.CreateAdd(Load, ConstantInt::get(IRB.getInt64Ty(), 1)),
- Counter);
- return true;
-}
-
-int EfficiencySanitizer::getMemoryAccessFuncIndex(Value *Addr,
- const DataLayout &DL) {
- Type *OrigPtrTy = Addr->getType();
- Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
- assert(OrigTy->isSized());
- // The size is always a multiple of 8.
- uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8;
- if (TypeSizeBytes != 1 && TypeSizeBytes != 2 && TypeSizeBytes != 4 &&
- TypeSizeBytes != 8 && TypeSizeBytes != 16) {
- // Irregular sizes do not have per-size call targets.
- NumAccessesWithIrregularSize++;
- return -1;
- }
- size_t Idx = countTrailingZeros(TypeSizeBytes);
- assert(Idx < NumberOfAccessSizes);
- return Idx;
-}
-
-bool EfficiencySanitizer::instrumentFastpath(Instruction *I,
- const DataLayout &DL, bool IsStore,
- Value *Addr, unsigned Alignment) {
- if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
- return instrumentFastpathCacheFrag(I, DL, Addr, Alignment);
- } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) {
- return instrumentFastpathWorkingSet(I, DL, Addr, Alignment);
- }
- return false;
-}
-
-bool EfficiencySanitizer::instrumentFastpathCacheFrag(Instruction *I,
- const DataLayout &DL,
- Value *Addr,
- unsigned Alignment) {
- // Do nothing.
- return true; // Return true to avoid slowpath instrumentation.
-}
-
-bool EfficiencySanitizer::instrumentFastpathWorkingSet(
- Instruction *I, const DataLayout &DL, Value *Addr, unsigned Alignment) {
- assert(ShadowScale[Options.ToolType] == 6); // The code below assumes this
- IRBuilder<> IRB(I);
- Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
- const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
- // Bail to the slowpath if the access might touch multiple cache lines.
- // An access aligned to its size is guaranteed to be intra-cache-line.
- // getMemoryAccessFuncIndex has already ruled out a size larger than 16
- // and thus larger than a cache line for platforms this tool targets
- // (and our shadow memory setup assumes 64-byte cache lines).
- assert(TypeSize <= 128);
- if (!(TypeSize == 8 ||
- (Alignment % (TypeSize / 8)) == 0)) {
- if (ClAssumeIntraCacheLine)
- ++NumAssumedIntraCacheLine;
- else
- return false;
- }
-
- // We inline instrumentation to set the corresponding shadow bits for
- // each cache line touched by the application. Here we handle a single
- // load or store where we've already ruled out the possibility that it
- // might touch more than one cache line and thus we simply update the
- // shadow memory for a single cache line.
- // Our shadow memory model is fine with races when manipulating shadow values.
- // We generate the following code:
- //
- // const char BitMask = 0x81;
- // char *ShadowAddr = appToShadow(AppAddr);
- // if ((*ShadowAddr & BitMask) != BitMask)
- // *ShadowAddr |= Bitmask;
- //
- Value *AddrPtr = IRB.CreatePointerCast(Addr, IntptrTy);
- Value *ShadowPtr = appToShadow(AddrPtr, IRB);
- Type *ShadowTy = IntegerType::get(*Ctx, 8U);
- Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
- // The bottom bit is used for the current sampling period's working set.
- // The top bit is used for the total working set. We set both on each
- // memory access, if they are not already set.
- Value *ValueMask = ConstantInt::get(ShadowTy, 0x81); // 10000001B
-
- Value *OldValue = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
- // The AND and CMP will be turned into a TEST instruction by the compiler.
- Value *Cmp = IRB.CreateICmpNE(IRB.CreateAnd(OldValue, ValueMask), ValueMask);
- Instruction *CmpTerm = SplitBlockAndInsertIfThen(Cmp, I, false);
- // FIXME: do I need to call SetCurrentDebugLocation?
- IRB.SetInsertPoint(CmpTerm);
- // We use OR to set the shadow bits to avoid corrupting the middle 6 bits,
- // which are used by the runtime library.
- Value *NewVal = IRB.CreateOr(OldValue, ValueMask);
- IRB.CreateStore(NewVal, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
- IRB.SetInsertPoint(I);
-
- return true;
-}
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 9af64ed332cd..59950ffc4e9a 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -1,9 +1,8 @@
//===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -103,11 +102,11 @@ private:
std::vector<Regex> &Regexes);
// Get pointers to the functions in the runtime library.
- Constant *getStartFileFunc();
- Constant *getEmitFunctionFunc();
- Constant *getEmitArcsFunc();
- Constant *getSummaryInfoFunc();
- Constant *getEndFileFunc();
+ FunctionCallee getStartFileFunc();
+ FunctionCallee getEmitFunctionFunc();
+ FunctionCallee getEmitArcsFunc();
+ FunctionCallee getSummaryInfoFunc();
+ FunctionCallee getEndFileFunc();
// Add the function to write out all our counters to the global destructor
// list.
@@ -648,7 +647,7 @@ void GCOVProfiler::AddFlushBeforeForkAndExec() {
for (auto I : ForkAndExecs) {
IRBuilder<> Builder(I);
FunctionType *FTy = FunctionType::get(Builder.getVoidTy(), {}, false);
- Constant *GCOVFlush = M->getOrInsertFunction("__gcov_flush", FTy);
+ FunctionCallee GCOVFlush = M->getOrInsertFunction("__gcov_flush", FTy);
Builder.CreateCall(GCOVFlush);
I->getParent()->splitBasicBlock(I);
}
@@ -811,14 +810,14 @@ bool GCOVProfiler::emitProfileArcs() {
auto It = EdgeToCounter.find({Pred, &BB});
assert(It != EdgeToCounter.end());
const unsigned Edge = It->second;
- Value *EdgeCounter =
- BuilderForPhi.CreateConstInBoundsGEP2_64(Counters, 0, Edge);
+ Value *EdgeCounter = BuilderForPhi.CreateConstInBoundsGEP2_64(
+ Counters->getValueType(), Counters, 0, Edge);
Phi->addIncoming(EdgeCounter, Pred);
}
// Skip phis, landingpads.
IRBuilder<> Builder(&*BB.getFirstInsertionPt());
- Value *Count = Builder.CreateLoad(Phi);
+ Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), Phi);
Count = Builder.CreateAdd(Count, Builder.getInt64(1));
Builder.CreateStore(Count, Phi);
@@ -827,9 +826,9 @@ bool GCOVProfiler::emitProfileArcs() {
auto It = EdgeToCounter.find({&BB, nullptr});
assert(It != EdgeToCounter.end());
const unsigned Edge = It->second;
- Value *Counter =
- Builder.CreateConstInBoundsGEP2_64(Counters, 0, Edge);
- Value *Count = Builder.CreateLoad(Counter);
+ Value *Counter = Builder.CreateConstInBoundsGEP2_64(
+ Counters->getValueType(), Counters, 0, Edge);
+ Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), Counter);
Count = Builder.CreateAdd(Count, Builder.getInt64(1));
Builder.CreateStore(Count, Counter);
}
@@ -864,7 +863,7 @@ bool GCOVProfiler::emitProfileArcs() {
// Initialize the environment and register the local writeout and flush
// functions.
- Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
+ FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
Builder.CreateCall(GCOVInit, {WriteoutF, FlushF});
Builder.CreateRetVoid();
@@ -874,22 +873,21 @@ bool GCOVProfiler::emitProfileArcs() {
return Result;
}
-Constant *GCOVProfiler::getStartFileFunc() {
+FunctionCallee GCOVProfiler::getStartFileFunc() {
Type *Args[] = {
Type::getInt8PtrTy(*Ctx), // const char *orig_filename
Type::getInt8PtrTy(*Ctx), // const char version[4]
Type::getInt32Ty(*Ctx), // uint32_t checksum
};
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
- auto *Res = M->getOrInsertFunction("llvm_gcda_start_file", FTy);
- if (Function *FunRes = dyn_cast<Function>(Res))
- if (auto AK = TLI->getExtAttrForI32Param(false))
- FunRes->addParamAttr(2, AK);
+ AttributeList AL;
+ if (auto AK = TLI->getExtAttrForI32Param(false))
+ AL = AL.addParamAttribute(*Ctx, 2, AK);
+ FunctionCallee Res = M->getOrInsertFunction("llvm_gcda_start_file", FTy, AL);
return Res;
-
}
-Constant *GCOVProfiler::getEmitFunctionFunc() {
+FunctionCallee GCOVProfiler::getEmitFunctionFunc() {
Type *Args[] = {
Type::getInt32Ty(*Ctx), // uint32_t ident
Type::getInt8PtrTy(*Ctx), // const char *function_name
@@ -898,36 +896,34 @@ Constant *GCOVProfiler::getEmitFunctionFunc() {
Type::getInt32Ty(*Ctx), // uint32_t cfg_checksum
};
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
- auto *Res = M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
- if (Function *FunRes = dyn_cast<Function>(Res))
- if (auto AK = TLI->getExtAttrForI32Param(false)) {
- FunRes->addParamAttr(0, AK);
- FunRes->addParamAttr(2, AK);
- FunRes->addParamAttr(3, AK);
- FunRes->addParamAttr(4, AK);
- }
- return Res;
+ AttributeList AL;
+ if (auto AK = TLI->getExtAttrForI32Param(false)) {
+ AL = AL.addParamAttribute(*Ctx, 0, AK);
+ AL = AL.addParamAttribute(*Ctx, 2, AK);
+ AL = AL.addParamAttribute(*Ctx, 3, AK);
+ AL = AL.addParamAttribute(*Ctx, 4, AK);
+ }
+ return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
}
-Constant *GCOVProfiler::getEmitArcsFunc() {
+FunctionCallee GCOVProfiler::getEmitArcsFunc() {
Type *Args[] = {
Type::getInt32Ty(*Ctx), // uint32_t num_counters
Type::getInt64PtrTy(*Ctx), // uint64_t *counters
};
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
- auto *Res = M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
- if (Function *FunRes = dyn_cast<Function>(Res))
- if (auto AK = TLI->getExtAttrForI32Param(false))
- FunRes->addParamAttr(0, AK);
- return Res;
+ AttributeList AL;
+ if (auto AK = TLI->getExtAttrForI32Param(false))
+ AL = AL.addParamAttribute(*Ctx, 0, AK);
+ return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy, AL);
}
-Constant *GCOVProfiler::getSummaryInfoFunc() {
+FunctionCallee GCOVProfiler::getSummaryInfoFunc() {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
return M->getOrInsertFunction("llvm_gcda_summary_info", FTy);
}
-Constant *GCOVProfiler::getEndFileFunc() {
+FunctionCallee GCOVProfiler::getEndFileFunc() {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
}
@@ -947,11 +943,11 @@ Function *GCOVProfiler::insertCounterWriteout(
BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
IRBuilder<> Builder(BB);
- Constant *StartFile = getStartFileFunc();
- Constant *EmitFunction = getEmitFunctionFunc();
- Constant *EmitArcs = getEmitArcsFunc();
- Constant *SummaryInfo = getSummaryInfoFunc();
- Constant *EndFile = getEndFileFunc();
+ FunctionCallee StartFile = getStartFileFunc();
+ FunctionCallee EmitFunction = getEmitFunctionFunc();
+ FunctionCallee EmitArcs = getEmitArcsFunc();
+ FunctionCallee SummaryInfo = getSummaryInfoFunc();
+ FunctionCallee EndFile = getEndFileFunc();
NamedMDNode *CUNodes = M->getNamedMetadata("llvm.dbg.cu");
if (!CUNodes) {
@@ -1088,22 +1084,32 @@ Function *GCOVProfiler::insertCounterWriteout(
PHINode *IV =
Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2);
IV->addIncoming(Builder.getInt32(0), BB);
- auto *FileInfoPtr =
- Builder.CreateInBoundsGEP(FileInfoArrayGV, {Builder.getInt32(0), IV});
- auto *StartFileCallArgsPtr = Builder.CreateStructGEP(FileInfoPtr, 0);
+ auto *FileInfoPtr = Builder.CreateInBoundsGEP(
+ FileInfoArrayTy, FileInfoArrayGV, {Builder.getInt32(0), IV});
+ auto *StartFileCallArgsPtr =
+ Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 0);
auto *StartFileCall = Builder.CreateCall(
StartFile,
- {Builder.CreateLoad(Builder.CreateStructGEP(StartFileCallArgsPtr, 0)),
- Builder.CreateLoad(Builder.CreateStructGEP(StartFileCallArgsPtr, 1)),
- Builder.CreateLoad(Builder.CreateStructGEP(StartFileCallArgsPtr, 2))});
+ {Builder.CreateLoad(StartFileCallArgsTy->getElementType(0),
+ Builder.CreateStructGEP(StartFileCallArgsTy,
+ StartFileCallArgsPtr, 0)),
+ Builder.CreateLoad(StartFileCallArgsTy->getElementType(1),
+ Builder.CreateStructGEP(StartFileCallArgsTy,
+ StartFileCallArgsPtr, 1)),
+ Builder.CreateLoad(StartFileCallArgsTy->getElementType(2),
+ Builder.CreateStructGEP(StartFileCallArgsTy,
+ StartFileCallArgsPtr, 2))});
if (auto AK = TLI->getExtAttrForI32Param(false))
StartFileCall->addParamAttr(2, AK);
auto *NumCounters =
- Builder.CreateLoad(Builder.CreateStructGEP(FileInfoPtr, 1));
+ Builder.CreateLoad(FileInfoTy->getElementType(1),
+ Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 1));
auto *EmitFunctionCallArgsArray =
- Builder.CreateLoad(Builder.CreateStructGEP(FileInfoPtr, 2));
+ Builder.CreateLoad(FileInfoTy->getElementType(2),
+ Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 2));
auto *EmitArcsCallArgsArray =
- Builder.CreateLoad(Builder.CreateStructGEP(FileInfoPtr, 3));
+ Builder.CreateLoad(FileInfoTy->getElementType(3),
+ Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 3));
auto *EnterCounterLoopCond =
Builder.CreateICmpSLT(Builder.getInt32(0), NumCounters);
Builder.CreateCondBr(EnterCounterLoopCond, CounterLoopHeader, FileLoopLatch);
@@ -1111,16 +1117,26 @@ Function *GCOVProfiler::insertCounterWriteout(
Builder.SetInsertPoint(CounterLoopHeader);
auto *JV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2);
JV->addIncoming(Builder.getInt32(0), FileLoopHeader);
- auto *EmitFunctionCallArgsPtr =
- Builder.CreateInBoundsGEP(EmitFunctionCallArgsArray, {JV});
+ auto *EmitFunctionCallArgsPtr = Builder.CreateInBoundsGEP(
+ EmitFunctionCallArgsTy, EmitFunctionCallArgsArray, JV);
auto *EmitFunctionCall = Builder.CreateCall(
EmitFunction,
- {Builder.CreateLoad(Builder.CreateStructGEP(EmitFunctionCallArgsPtr, 0)),
- Builder.CreateLoad(Builder.CreateStructGEP(EmitFunctionCallArgsPtr, 1)),
- Builder.CreateLoad(Builder.CreateStructGEP(EmitFunctionCallArgsPtr, 2)),
- Builder.CreateLoad(Builder.CreateStructGEP(EmitFunctionCallArgsPtr, 3)),
- Builder.CreateLoad(
- Builder.CreateStructGEP(EmitFunctionCallArgsPtr, 4))});
+ {Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(0),
+ Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+ EmitFunctionCallArgsPtr, 0)),
+ Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(1),
+ Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+ EmitFunctionCallArgsPtr, 1)),
+ Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(2),
+ Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+ EmitFunctionCallArgsPtr, 2)),
+ Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(3),
+ Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+ EmitFunctionCallArgsPtr, 3)),
+ Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(4),
+ Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+ EmitFunctionCallArgsPtr,
+ 4))});
if (auto AK = TLI->getExtAttrForI32Param(false)) {
EmitFunctionCall->addParamAttr(0, AK);
EmitFunctionCall->addParamAttr(2, AK);
@@ -1128,11 +1144,15 @@ Function *GCOVProfiler::insertCounterWriteout(
EmitFunctionCall->addParamAttr(4, AK);
}
auto *EmitArcsCallArgsPtr =
- Builder.CreateInBoundsGEP(EmitArcsCallArgsArray, {JV});
+ Builder.CreateInBoundsGEP(EmitArcsCallArgsTy, EmitArcsCallArgsArray, JV);
auto *EmitArcsCall = Builder.CreateCall(
EmitArcs,
- {Builder.CreateLoad(Builder.CreateStructGEP(EmitArcsCallArgsPtr, 0)),
- Builder.CreateLoad(Builder.CreateStructGEP(EmitArcsCallArgsPtr, 1))});
+ {Builder.CreateLoad(
+ EmitArcsCallArgsTy->getElementType(0),
+ Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 0)),
+ Builder.CreateLoad(EmitArcsCallArgsTy->getElementType(1),
+ Builder.CreateStructGEP(EmitArcsCallArgsTy,
+ EmitArcsCallArgsPtr, 1))});
if (auto AK = TLI->getExtAttrForI32Param(false))
EmitArcsCall->addParamAttr(0, AK);
auto *NextJV = Builder.CreateAdd(JV, Builder.getInt32(1));
@@ -1172,7 +1192,7 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF);
// Write out the current counters.
- Constant *WriteoutF = M->getFunction("__llvm_gcov_writeout");
+ Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
assert(WriteoutF && "Need to create the writeout function first!");
IRBuilder<> Builder(Entry);
diff --git a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index d04c2b76288f..90a9f4955a4b 100644
--- a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -1,9 +1,8 @@
//===- HWAddressSanitizer.cpp - detector of uninitialized reads -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,6 +11,7 @@
/// based on tagged addressing.
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
@@ -21,6 +21,7 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -125,10 +126,10 @@ static cl::opt<bool> ClEnableKhwasan(
// is accessed. The shadow mapping looks like:
// Shadow = (Mem >> scale) + offset
-static cl::opt<unsigned long long> ClMappingOffset(
- "hwasan-mapping-offset",
- cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"), cl::Hidden,
- cl::init(0));
+static cl::opt<uint64_t>
+ ClMappingOffset("hwasan-mapping-offset",
+ cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
+ cl::Hidden, cl::init(0));
static cl::opt<bool>
ClWithIfunc("hwasan-with-ifunc",
@@ -148,42 +149,46 @@ static cl::opt<bool>
"in a thread-local ring buffer"),
cl::Hidden, cl::init(true));
static cl::opt<bool>
- ClCreateFrameDescriptions("hwasan-create-frame-descriptions",
- cl::desc("create static frame descriptions"),
- cl::Hidden, cl::init(true));
-
-static cl::opt<bool>
ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
cl::desc("instrument memory intrinsics"),
cl::Hidden, cl::init(true));
+
+static cl::opt<bool>
+ ClInstrumentLandingPads("hwasan-instrument-landing-pads",
+ cl::desc("instrument landing pads"), cl::Hidden,
+ cl::init(true));
+
+static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
+ cl::desc("inline all checks"),
+ cl::Hidden, cl::init(false));
+
namespace {
/// An instrumentation pass implementing detection of addressability bugs
/// using tagged pointers.
-class HWAddressSanitizer : public FunctionPass {
+class HWAddressSanitizer {
public:
- // Pass identification, replacement for typeid.
- static char ID;
-
- explicit HWAddressSanitizer(bool CompileKernel = false, bool Recover = false)
- : FunctionPass(ID) {
+ explicit HWAddressSanitizer(Module &M, bool CompileKernel = false,
+ bool Recover = false) {
this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0 ?
ClEnableKhwasan : CompileKernel;
- }
- StringRef getPassName() const override { return "HWAddressSanitizer"; }
+ initializeModule(M);
+ }
- bool runOnFunction(Function &F) override;
- bool doInitialization(Module &M) override;
+ bool sanitizeFunction(Function &F);
+ void initializeModule(Module &M);
void initializeCallbacks(Module &M);
+ Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
Value *getDynamicShadowNonTls(IRBuilder<> &IRB);
void untagPointerOperand(Instruction *I, Value *Addr);
- Value *memToShadow(Value *Shadow, Type *Ty, IRBuilder<> &IRB);
- void instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
+ Value *shadowBase();
+ Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
+ void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
unsigned AccessSizeIndex,
Instruction *InsertBefore);
void instrumentMemIntrinsic(MemIntrinsic *MI);
@@ -193,11 +198,15 @@ public:
Value **MaybeMask);
bool isInterestingAlloca(const AllocaInst &AI);
- bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag);
+ bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
- bool instrumentStack(SmallVectorImpl<AllocaInst *> &Allocas,
- SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
+ bool instrumentStack(
+ SmallVectorImpl<AllocaInst *> &Allocas,
+ DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> &AllocaDeclareMap,
+ SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
+ Value *readRegister(IRBuilder<> &IRB, StringRef Name);
+ bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
Value *getNextTagWithCall(IRBuilder<> &IRB);
Value *getStackBaseTag(IRBuilder<> &IRB);
Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI,
@@ -205,31 +214,14 @@ public:
Value *getUARTag(IRBuilder<> &IRB, Value *StackTag);
Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
- Value *emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
+ void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
private:
LLVMContext *C;
std::string CurModuleUniqueId;
Triple TargetTriple;
- Function *HWAsanMemmove, *HWAsanMemcpy, *HWAsanMemset;
-
- // Frame description is a way to pass names/sizes of local variables
- // to the run-time w/o adding extra executable code in every function.
- // We do this by creating a separate section with {PC,Descr} pairs and passing
- // the section beg/end to __hwasan_init_frames() at module init time.
- std::string createFrameString(ArrayRef<AllocaInst*> Allocas);
- void createFrameGlobal(Function &F, const std::string &FrameString);
- // Get the section name for frame descriptions. Currently ELF-only.
- const char *getFrameSection() { return "__hwasan_frames"; }
- const char *getFrameSectionBeg() { return "__start___hwasan_frames"; }
- const char *getFrameSectionEnd() { return "__stop___hwasan_frames"; }
- GlobalVariable *createFrameSectionBound(Module &M, Type *Ty,
- const char *Name) {
- auto GV = new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
- nullptr, Name);
- GV->setVisibility(GlobalValue::HiddenVisibility);
- return GV;
- }
+ FunctionCallee HWAsanMemmove, HWAsanMemcpy, HWAsanMemset;
+ FunctionCallee HWAsanHandleVfork;
/// This struct defines the shadow mapping using the rule:
/// shadow = (mem >> Scale) + Offset.
@@ -253,48 +245,95 @@ private:
Type *IntptrTy;
Type *Int8PtrTy;
Type *Int8Ty;
+ Type *Int32Ty;
bool CompileKernel;
bool Recover;
Function *HwasanCtorFunction;
- Function *HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
- Function *HwasanMemoryAccessCallbackSized[2];
+ FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
+ FunctionCallee HwasanMemoryAccessCallbackSized[2];
- Function *HwasanTagMemoryFunc;
- Function *HwasanGenerateTagFunc;
- Function *HwasanThreadEnterFunc;
+ FunctionCallee HwasanTagMemoryFunc;
+ FunctionCallee HwasanGenerateTagFunc;
+ FunctionCallee HwasanThreadEnterFunc;
Constant *ShadowGlobal;
Value *LocalDynamicShadow = nullptr;
+ Value *StackBaseTag = nullptr;
GlobalValue *ThreadPtrGlobal = nullptr;
};
+class HWAddressSanitizerLegacyPass : public FunctionPass {
+public:
+ // Pass identification, replacement for typeid.
+ static char ID;
+
+ explicit HWAddressSanitizerLegacyPass(bool CompileKernel = false,
+ bool Recover = false)
+ : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover) {}
+
+ StringRef getPassName() const override { return "HWAddressSanitizer"; }
+
+ bool doInitialization(Module &M) override {
+ HWASan = llvm::make_unique<HWAddressSanitizer>(M, CompileKernel, Recover);
+ return true;
+ }
+
+ bool runOnFunction(Function &F) override {
+ return HWASan->sanitizeFunction(F);
+ }
+
+ bool doFinalization(Module &M) override {
+ HWASan.reset();
+ return false;
+ }
+
+private:
+ std::unique_ptr<HWAddressSanitizer> HWASan;
+ bool CompileKernel;
+ bool Recover;
+};
+
} // end anonymous namespace
-char HWAddressSanitizer::ID = 0;
+char HWAddressSanitizerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(
- HWAddressSanitizer, "hwasan",
+ HWAddressSanitizerLegacyPass, "hwasan",
"HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
false)
INITIALIZE_PASS_END(
- HWAddressSanitizer, "hwasan",
+ HWAddressSanitizerLegacyPass, "hwasan",
"HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
false)
-FunctionPass *llvm::createHWAddressSanitizerPass(bool CompileKernel,
- bool Recover) {
+FunctionPass *llvm::createHWAddressSanitizerLegacyPassPass(bool CompileKernel,
+ bool Recover) {
assert(!CompileKernel || Recover);
- return new HWAddressSanitizer(CompileKernel, Recover);
+ return new HWAddressSanitizerLegacyPass(CompileKernel, Recover);
+}
+
+HWAddressSanitizerPass::HWAddressSanitizerPass(bool CompileKernel, bool Recover)
+ : CompileKernel(CompileKernel), Recover(Recover) {}
+
+PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ HWAddressSanitizer HWASan(M, CompileKernel, Recover);
+ bool Modified = false;
+ for (Function &F : M)
+ Modified |= HWASan.sanitizeFunction(F);
+ if (Modified)
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
}
/// Module-level initialization.
///
/// inserts a call to __hwasan_init to the module's constructor list.
-bool HWAddressSanitizer::doInitialization(Module &M) {
+void HWAddressSanitizer::initializeModule(Module &M) {
LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
auto &DL = M.getDataLayout();
@@ -308,47 +347,35 @@ bool HWAddressSanitizer::doInitialization(Module &M) {
IntptrTy = IRB.getIntPtrTy(DL);
Int8PtrTy = IRB.getInt8PtrTy();
Int8Ty = IRB.getInt8Ty();
+ Int32Ty = IRB.getInt32Ty();
HwasanCtorFunction = nullptr;
if (!CompileKernel) {
std::tie(HwasanCtorFunction, std::ignore) =
- createSanitizerCtorAndInitFunctions(M, kHwasanModuleCtorName,
- kHwasanInitName,
- /*InitArgTypes=*/{},
- /*InitArgs=*/{});
- Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
- HwasanCtorFunction->setComdat(CtorComdat);
- appendToGlobalCtors(M, HwasanCtorFunction, 0, HwasanCtorFunction);
-
- // Create a zero-length global in __hwasan_frame so that the linker will
- // always create start and stop symbols.
- //
- // N.B. If we ever start creating associated metadata in this pass this
- // global will need to be associated with the ctor.
- Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
- auto GV =
- new GlobalVariable(M, Int8Arr0Ty, /*isConstantGlobal*/ true,
- GlobalVariable::PrivateLinkage,
- Constant::getNullValue(Int8Arr0Ty), "__hwasan");
- GV->setSection(getFrameSection());
- GV->setComdat(CtorComdat);
- appendToCompilerUsed(M, GV);
-
- IRBuilder<> IRBCtor(HwasanCtorFunction->getEntryBlock().getTerminator());
- IRBCtor.CreateCall(
- declareSanitizerInitFunction(M, "__hwasan_init_frames",
- {Int8PtrTy, Int8PtrTy}),
- {createFrameSectionBound(M, Int8Ty, getFrameSectionBeg()),
- createFrameSectionBound(M, Int8Ty, getFrameSectionEnd())});
+ getOrCreateSanitizerCtorAndInitFunctions(
+ M, kHwasanModuleCtorName, kHwasanInitName,
+ /*InitArgTypes=*/{},
+ /*InitArgs=*/{},
+ // This callback is invoked when the functions are created the first
+ // time. Hook them into the global ctors list in that case:
+ [&](Function *Ctor, FunctionCallee) {
+ Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
+ Ctor->setComdat(CtorComdat);
+ appendToGlobalCtors(M, Ctor, 0, Ctor);
+ });
}
- if (!TargetTriple.isAndroid())
- appendToCompilerUsed(
- M, ThreadPtrGlobal = new GlobalVariable(
- M, IntptrTy, false, GlobalVariable::ExternalLinkage, nullptr,
- "__hwasan_tls", nullptr, GlobalVariable::InitialExecTLSModel));
-
- return true;
+ if (!TargetTriple.isAndroid()) {
+ Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
+ auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
+ GlobalValue::ExternalLinkage, nullptr,
+ "__hwasan_tls", nullptr,
+ GlobalVariable::InitialExecTLSModel);
+ appendToCompilerUsed(M, GV);
+ return GV;
+ });
+ ThreadPtrGlobal = cast<GlobalVariable>(C);
+ }
}
void HWAddressSanitizer::initializeCallbacks(Module &M) {
@@ -357,44 +384,55 @@ void HWAddressSanitizer::initializeCallbacks(Module &M) {
const std::string TypeStr = AccessIsWrite ? "store" : "load";
const std::string EndingStr = Recover ? "_noabort" : "";
- HwasanMemoryAccessCallbackSized[AccessIsWrite] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr,
- FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false)));
+ HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr,
+ FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false));
for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
AccessSizeIndex++) {
HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ M.getOrInsertFunction(
ClMemoryAccessCallbackPrefix + TypeStr +
itostr(1ULL << AccessSizeIndex) + EndingStr,
- FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false)));
+ FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false));
}
}
- HwasanTagMemoryFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy));
- HwasanGenerateTagFunc = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty));
+ HwasanTagMemoryFunc = M.getOrInsertFunction(
+ "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy);
+ HwasanGenerateTagFunc =
+ M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
- if (Mapping.InGlobal)
- ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
- ArrayType::get(IRB.getInt8Ty(), 0));
+ ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
+ ArrayType::get(IRB.getInt8Ty(), 0));
const std::string MemIntrinCallbackPrefix =
CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
- HWAsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- MemIntrinCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy));
- HWAsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- MemIntrinCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy));
- HWAsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- MemIntrinCallbackPrefix + "memset", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy));
-
- HwasanThreadEnterFunc = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("__hwasan_thread_enter", IRB.getVoidTy()));
+ HWAsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy);
+ HWAsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy);
+ HWAsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt32Ty(), IntptrTy);
+
+ HWAsanHandleVfork =
+ M.getOrInsertFunction("__hwasan_handle_vfork", IRB.getVoidTy(), IntptrTy);
+
+ HwasanThreadEnterFunc =
+ M.getOrInsertFunction("__hwasan_thread_enter", IRB.getVoidTy());
+}
+
+Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
+ // An empty inline asm with input reg == output reg.
+ // An opaque no-op cast, basically.
+ InlineAsm *Asm = InlineAsm::get(
+ FunctionType::get(Int8PtrTy, {ShadowGlobal->getType()}, false),
+ StringRef(""), StringRef("=r,0"),
+ /*hasSideEffects=*/false);
+ return IRB.CreateCall(Asm, {ShadowGlobal}, ".hwasan.shadow");
}
Value *HWAddressSanitizer::getDynamicShadowNonTls(IRBuilder<> &IRB) {
@@ -403,18 +441,12 @@ Value *HWAddressSanitizer::getDynamicShadowNonTls(IRBuilder<> &IRB) {
return nullptr;
if (Mapping.InGlobal) {
- // An empty inline asm with input reg == output reg.
- // An opaque pointer-to-int cast, basically.
- InlineAsm *Asm = InlineAsm::get(
- FunctionType::get(IntptrTy, {ShadowGlobal->getType()}, false),
- StringRef(""), StringRef("=r,0"),
- /*hasSideEffects=*/false);
- return IRB.CreateCall(Asm, {ShadowGlobal}, ".hwasan.shadow");
+ return getDynamicShadowIfunc(IRB);
} else {
Value *GlobalDynamicAddress =
IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
- kHwasanShadowMemoryDynamicAddress, IntptrTy);
- return IRB.CreateLoad(GlobalDynamicAddress);
+ kHwasanShadowMemoryDynamicAddress, Int8PtrTy);
+ return IRB.CreateLoad(Int8PtrTy, GlobalDynamicAddress);
}
}
@@ -506,29 +538,44 @@ void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
}
-Value *HWAddressSanitizer::memToShadow(Value *Mem, Type *Ty, IRBuilder<> &IRB) {
+Value *HWAddressSanitizer::shadowBase() {
+ if (LocalDynamicShadow)
+ return LocalDynamicShadow;
+ return ConstantExpr::getIntToPtr(ConstantInt::get(IntptrTy, Mapping.Offset),
+ Int8PtrTy);
+}
+
+Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
// Mem >> Scale
Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
if (Mapping.Offset == 0)
- return Shadow;
+ return IRB.CreateIntToPtr(Shadow, Int8PtrTy);
// (Mem >> Scale) + Offset
- Value *ShadowBase;
- if (LocalDynamicShadow)
- ShadowBase = LocalDynamicShadow;
- else
- ShadowBase = ConstantInt::get(Ty, Mapping.Offset);
- return IRB.CreateAdd(Shadow, ShadowBase);
+ return IRB.CreateGEP(Int8Ty, shadowBase(), Shadow);
}
-void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
+void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
unsigned AccessSizeIndex,
Instruction *InsertBefore) {
+ const int64_t AccessInfo = Recover * 0x20 + IsWrite * 0x10 + AccessSizeIndex;
IRBuilder<> IRB(InsertBefore);
+
+ if (!ClInlineAllChecks && TargetTriple.isAArch64() &&
+ TargetTriple.isOSBinFormatELF() && !Recover) {
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
+ IRB.CreateCall(
+ Intrinsic::getDeclaration(M, Intrinsic::hwasan_check_memaccess),
+ {shadowBase(), Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
+ return;
+ }
+
+ Value *PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, kPointerTagShift),
IRB.getInt8Ty());
Value *AddrLong = untagPointer(IRB, PtrLong);
- Value *ShadowLong = memToShadow(AddrLong, PtrLong->getType(), IRB);
- Value *MemTag = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowLong, Int8PtrTy));
+ Value *Shadow = memToShadow(AddrLong, IRB);
+ Value *MemTag = IRB.CreateLoad(Int8Ty, Shadow);
Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
int matchAllTag = ClMatchAllTag.getNumOccurrences() > 0 ?
@@ -540,11 +587,35 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
}
Instruction *CheckTerm =
- SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, !Recover,
+ SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, false,
MDBuilder(*C).createBranchWeights(1, 100000));
IRB.SetInsertPoint(CheckTerm);
- const int64_t AccessInfo = Recover * 0x20 + IsWrite * 0x10 + AccessSizeIndex;
+ Value *OutOfShortGranuleTagRange =
+ IRB.CreateICmpUGT(MemTag, ConstantInt::get(Int8Ty, 15));
+ Instruction *CheckFailTerm =
+ SplitBlockAndInsertIfThen(OutOfShortGranuleTagRange, CheckTerm, !Recover,
+ MDBuilder(*C).createBranchWeights(1, 100000));
+
+ IRB.SetInsertPoint(CheckTerm);
+ Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(PtrLong, 15), Int8Ty);
+ PtrLowBits = IRB.CreateAdd(
+ PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
+ Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, MemTag);
+ SplitBlockAndInsertIfThen(PtrLowBitsOOB, CheckTerm, false,
+ MDBuilder(*C).createBranchWeights(1, 100000),
+ nullptr, nullptr, CheckFailTerm->getParent());
+
+ IRB.SetInsertPoint(CheckTerm);
+ Value *InlineTagAddr = IRB.CreateOr(AddrLong, 15);
+ InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, Int8PtrTy);
+ Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
+ Value *InlineTagMismatch = IRB.CreateICmpNE(PtrTag, InlineTag);
+ SplitBlockAndInsertIfThen(InlineTagMismatch, CheckTerm, false,
+ MDBuilder(*C).createBranchWeights(1, 100000),
+ nullptr, nullptr, CheckFailTerm->getParent());
+
+ IRB.SetInsertPoint(CheckFailTerm);
InlineAsm *Asm;
switch (TargetTriple.getArch()) {
case Triple::x86_64:
@@ -568,6 +639,8 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
report_fatal_error("unsupported architecture");
}
IRB.CreateCall(Asm, PtrLong);
+ if (Recover)
+ cast<BranchInst>(CheckFailTerm)->setSuccessor(0, CheckTerm->getParent());
}
void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
@@ -610,7 +683,6 @@ bool HWAddressSanitizer::instrumentMemAccess(Instruction *I) {
return false; //FIXME
IRBuilder<> IRB(I);
- Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
if (isPowerOf2_64(TypeSize) &&
(TypeSize / 8 <= (1UL << (kNumberOfAccessSizes - 1))) &&
(Alignment >= (1UL << Mapping.Scale) || Alignment == 0 ||
@@ -618,13 +690,14 @@ bool HWAddressSanitizer::instrumentMemAccess(Instruction *I) {
size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
if (ClInstrumentWithCalls) {
IRB.CreateCall(HwasanMemoryAccessCallback[IsWrite][AccessSizeIndex],
- AddrLong);
+ IRB.CreatePointerCast(Addr, IntptrTy));
} else {
- instrumentMemAccessInline(AddrLong, IsWrite, AccessSizeIndex, I);
+ instrumentMemAccessInline(Addr, IsWrite, AccessSizeIndex, I);
}
} else {
IRB.CreateCall(HwasanMemoryAccessCallbackSized[IsWrite],
- {AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8)});
+ {IRB.CreatePointerCast(Addr, IntptrTy),
+ ConstantInt::get(IntptrTy, TypeSize / 8)});
}
untagPointerOperand(I, Addr);
@@ -644,27 +717,33 @@ static uint64_t getAllocaSizeInBytes(const AllocaInst &AI) {
}
bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI,
- Value *Tag) {
- size_t Size = (getAllocaSizeInBytes(*AI) + Mapping.getAllocaAlignment() - 1) &
- ~(Mapping.getAllocaAlignment() - 1);
+ Value *Tag, size_t Size) {
+ size_t AlignedSize = alignTo(Size, Mapping.getAllocaAlignment());
Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty());
if (ClInstrumentWithCalls) {
IRB.CreateCall(HwasanTagMemoryFunc,
{IRB.CreatePointerCast(AI, Int8PtrTy), JustTag,
- ConstantInt::get(IntptrTy, Size)});
+ ConstantInt::get(IntptrTy, AlignedSize)});
} else {
size_t ShadowSize = Size >> Mapping.Scale;
- Value *ShadowPtr = IRB.CreateIntToPtr(
- memToShadow(IRB.CreatePointerCast(AI, IntptrTy), AI->getType(), IRB),
- Int8PtrTy);
+ Value *ShadowPtr = memToShadow(IRB.CreatePointerCast(AI, IntptrTy), IRB);
// If this memset is not inlined, it will be intercepted in the hwasan
// runtime library. That's OK, because the interceptor skips the checks if
// the address is in the shadow region.
// FIXME: the interceptor is not as fast as real memset. Consider lowering
// llvm.memset right here into either a sequence of stores, or a call to
// hwasan_tag_memory.
- IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, /*Align=*/1);
+ if (ShadowSize)
+ IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, /*Align=*/1);
+ if (Size != AlignedSize) {
+ IRB.CreateStore(
+ ConstantInt::get(Int8Ty, Size % Mapping.getAllocaAlignment()),
+ IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
+ IRB.CreateStore(JustTag, IRB.CreateConstGEP1_32(
+ Int8Ty, IRB.CreateBitCast(AI, Int8PtrTy),
+ AlignedSize - 1));
+ }
}
return true;
}
@@ -674,10 +753,16 @@ static unsigned RetagMask(unsigned AllocaNo) {
// x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
// masks.
// The list does not include the value 255, which is used for UAR.
- static unsigned FastMasks[] = {
- 0, 1, 2, 3, 4, 6, 7, 8, 12, 14, 15, 16, 24,
- 28, 30, 31, 32, 48, 56, 60, 62, 63, 64, 96, 112, 120,
- 124, 126, 127, 128, 192, 224, 240, 248, 252, 254};
+ //
+ // Because we are more likely to use earlier elements of this list than later
+ // ones, it is sorted in increasing order of probability of collision with a
+ // mask allocated (temporally) nearby. The program that generated this list
+ // can be found at:
+ // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
+ static unsigned FastMasks[] = {0, 128, 64, 192, 32, 96, 224, 112, 240,
+ 48, 16, 120, 248, 56, 24, 8, 124, 252,
+ 60, 28, 12, 4, 126, 254, 62, 30, 14,
+ 6, 2, 127, 63, 31, 15, 7, 3, 1};
return FastMasks[AllocaNo % (sizeof(FastMasks) / sizeof(FastMasks[0]))];
}
@@ -688,6 +773,8 @@ Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
if (ClGenerateTagsWithCalls)
return getNextTagWithCall(IRB);
+ if (StackBaseTag)
+ return StackBaseTag;
// FIXME: use addressofreturnaddress (but implement it in aarch64 backend
// first).
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
@@ -763,7 +850,8 @@ Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
Function *ThreadPointerFunc =
Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
Value *SlotPtr = IRB.CreatePointerCast(
- IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), 0x30),
+ IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
+ IRB.CreateCall(ThreadPointerFunc), 0x30),
Ty->getPointerTo(0));
return SlotPtr;
}
@@ -774,45 +862,21 @@ Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
return nullptr;
}
-// Creates a string with a description of the stack frame (set of Allocas).
-// The string is intended to be human readable.
-// The current form is: Size1 Name1; Size2 Name2; ...
-std::string
-HWAddressSanitizer::createFrameString(ArrayRef<AllocaInst *> Allocas) {
- std::ostringstream Descr;
- for (auto AI : Allocas)
- Descr << getAllocaSizeInBytes(*AI) << " " << AI->getName().str() << "; ";
- return Descr.str();
-}
-
-// Creates a global in the frame section which consists of two pointers:
-// the function PC and the frame string constant.
-void HWAddressSanitizer::createFrameGlobal(Function &F,
- const std::string &FrameString) {
- Module &M = *F.getParent();
- auto DescrGV = createPrivateGlobalForString(M, FrameString, true);
- auto PtrPairTy = StructType::get(F.getType(), DescrGV->getType());
- auto GV = new GlobalVariable(
- M, PtrPairTy, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
- ConstantStruct::get(PtrPairTy, (Constant *)&F, (Constant *)DescrGV),
- "__hwasan");
- GV->setSection(getFrameSection());
- appendToCompilerUsed(M, GV);
- // Put GV into the F's Comadat so that if F is deleted GV can be deleted too.
- if (auto Comdat =
- GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId))
- GV->setComdat(Comdat);
-}
+void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
+ if (!Mapping.InTls) {
+ LocalDynamicShadow = getDynamicShadowNonTls(IRB);
+ return;
+ }
-Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB,
- bool WithFrameRecord) {
- if (!Mapping.InTls)
- return getDynamicShadowNonTls(IRB);
+ if (!WithFrameRecord && TargetTriple.isAndroid()) {
+ LocalDynamicShadow = getDynamicShadowIfunc(IRB);
+ return;
+ }
Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
assert(SlotPtr);
- Instruction *ThreadLong = IRB.CreateLoad(SlotPtr);
+ Instruction *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
Function *F = IRB.GetInsertBlock()->getParent();
if (F->getFnAttribute("hwasan-abi").getValueAsString() == "interceptor") {
@@ -826,7 +890,7 @@ Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB,
// FIXME: This should call a new runtime function with a custom calling
// convention to avoid needing to spill all arguments here.
IRB.CreateCall(HwasanThreadEnterFunc);
- LoadInst *ReloadThreadLong = IRB.CreateLoad(SlotPtr);
+ LoadInst *ReloadThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
IRB.SetInsertPoint(&*Br->getSuccessor(0)->begin());
PHINode *ThreadLongPhi = IRB.CreatePHI(IntptrTy, 2);
@@ -840,15 +904,21 @@ Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB,
TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong);
if (WithFrameRecord) {
+ StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
+
// Prepare ring buffer data.
- auto PC = IRB.CreatePtrToInt(F, IntptrTy);
+ Value *PC;
+ if (TargetTriple.getArch() == Triple::aarch64)
+ PC = readRegister(IRB, "pc");
+ else
+ PC = IRB.CreatePtrToInt(F, IntptrTy);
auto GetStackPointerFn =
Intrinsic::getDeclaration(F->getParent(), Intrinsic::frameaddress);
Value *SP = IRB.CreatePtrToInt(
IRB.CreateCall(GetStackPointerFn,
{Constant::getNullValue(IRB.getInt32Ty())}),
IntptrTy);
- // Mix SP and PC. TODO: also add the tag to the mix.
+ // Mix SP and PC.
// Assumptions:
// PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero)
// SP is 0xsssssssssssSSSS0 (4 lower bits are zero)
@@ -879,16 +949,38 @@ Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB,
// Get shadow base address by aligning RecordPtr up.
// Note: this is not correct if the pointer is already aligned.
// Runtime library will make sure this never happens.
- Value *ShadowBase = IRB.CreateAdd(
+ LocalDynamicShadow = IRB.CreateAdd(
IRB.CreateOr(
ThreadLongMaybeUntagged,
ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
- return ShadowBase;
+ LocalDynamicShadow = IRB.CreateIntToPtr(LocalDynamicShadow, Int8PtrTy);
+}
+
+Value *HWAddressSanitizer::readRegister(IRBuilder<> &IRB, StringRef Name) {
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ Function *ReadRegister =
+ Intrinsic::getDeclaration(M, Intrinsic::read_register, IntptrTy);
+ MDNode *MD = MDNode::get(*C, {MDString::get(*C, Name)});
+ Value *Args[] = {MetadataAsValue::get(*C, MD)};
+ return IRB.CreateCall(ReadRegister, Args);
+}
+
+bool HWAddressSanitizer::instrumentLandingPads(
+ SmallVectorImpl<Instruction *> &LandingPadVec) {
+ for (auto *LP : LandingPadVec) {
+ IRBuilder<> IRB(LP->getNextNode());
+ IRB.CreateCall(
+ HWAsanHandleVfork,
+ {readRegister(IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp"
+ : "sp")});
+ }
+ return true;
}
bool HWAddressSanitizer::instrumentStack(
SmallVectorImpl<AllocaInst *> &Allocas,
+ DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> &AllocaDeclareMap,
SmallVectorImpl<Instruction *> &RetVec, Value *StackTag) {
// Ideally, we want to calculate tagged stack base pointer, and rewrite all
// alloca addresses using that. Unfortunately, offsets are not known yet
@@ -913,14 +1005,22 @@ bool HWAddressSanitizer::instrumentStack(
U.set(Replacement);
}
- tagAlloca(IRB, AI, Tag);
+ for (auto *DDI : AllocaDeclareMap.lookup(AI)) {
+ DIExpression *OldExpr = DDI->getExpression();
+ DIExpression *NewExpr = DIExpression::append(
+ OldExpr, {dwarf::DW_OP_LLVM_tag_offset, RetagMask(N)});
+ DDI->setArgOperand(2, MetadataAsValue::get(*C, NewExpr));
+ }
+
+ size_t Size = getAllocaSizeInBytes(*AI);
+ tagAlloca(IRB, AI, Tag, Size);
for (auto RI : RetVec) {
IRB.SetInsertPoint(RI);
// Re-tag alloca memory with the special UAR tag.
Value *Tag = getUARTag(IRB, StackTag);
- tagAlloca(IRB, AI, Tag);
+ tagAlloca(IRB, AI, Tag, alignTo(Size, Mapping.getAllocaAlignment()));
}
}
@@ -943,7 +1043,7 @@ bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
!AI.isSwiftError());
}
-bool HWAddressSanitizer::runOnFunction(Function &F) {
+bool HWAddressSanitizer::sanitizeFunction(Function &F) {
if (&F == HwasanCtorFunction)
return false;
@@ -955,15 +1055,12 @@ bool HWAddressSanitizer::runOnFunction(Function &F) {
SmallVector<Instruction*, 16> ToInstrument;
SmallVector<AllocaInst*, 8> AllocasToInstrument;
SmallVector<Instruction*, 8> RetVec;
+ SmallVector<Instruction*, 8> LandingPadVec;
+ DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> AllocaDeclareMap;
for (auto &BB : F) {
for (auto &Inst : BB) {
if (ClInstrumentStack)
if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
- // Realign all allocas. We don't want small uninteresting allocas to
- // hide in instrumented alloca's padding.
- if (AI->getAlignment() < Mapping.getAllocaAlignment())
- AI->setAlignment(Mapping.getAllocaAlignment());
- // Instrument some of them.
if (isInterestingAlloca(*AI))
AllocasToInstrument.push_back(AI);
continue;
@@ -973,6 +1070,13 @@ bool HWAddressSanitizer::runOnFunction(Function &F) {
isa<CleanupReturnInst>(Inst))
RetVec.push_back(&Inst);
+ if (auto *DDI = dyn_cast<DbgDeclareInst>(&Inst))
+ if (auto *Alloca = dyn_cast_or_null<AllocaInst>(DDI->getAddress()))
+ AllocaDeclareMap[Alloca].push_back(DDI);
+
+ if (ClInstrumentLandingPads && isa<LandingPadInst>(Inst))
+ LandingPadVec.push_back(&Inst);
+
Value *MaybeMask = nullptr;
bool IsWrite;
unsigned Alignment;
@@ -984,33 +1088,93 @@ bool HWAddressSanitizer::runOnFunction(Function &F) {
}
}
- if (AllocasToInstrument.empty() && ToInstrument.empty())
- return false;
+ initializeCallbacks(*F.getParent());
- if (ClCreateFrameDescriptions && !AllocasToInstrument.empty())
- createFrameGlobal(F, createFrameString(AllocasToInstrument));
+ if (!LandingPadVec.empty())
+ instrumentLandingPads(LandingPadVec);
- initializeCallbacks(*F.getParent());
+ if (AllocasToInstrument.empty() && ToInstrument.empty())
+ return false;
assert(!LocalDynamicShadow);
Instruction *InsertPt = &*F.getEntryBlock().begin();
IRBuilder<> EntryIRB(InsertPt);
- LocalDynamicShadow = emitPrologue(EntryIRB,
- /*WithFrameRecord*/ ClRecordStackHistory &&
- !AllocasToInstrument.empty());
+ emitPrologue(EntryIRB,
+ /*WithFrameRecord*/ ClRecordStackHistory &&
+ !AllocasToInstrument.empty());
bool Changed = false;
if (!AllocasToInstrument.empty()) {
Value *StackTag =
ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
- Changed |= instrumentStack(AllocasToInstrument, RetVec, StackTag);
+ Changed |= instrumentStack(AllocasToInstrument, AllocaDeclareMap, RetVec,
+ StackTag);
+ }
+
+ // Pad and align each of the allocas that we instrumented to stop small
+ // uninteresting allocas from hiding in instrumented alloca's padding and so
+ // that we have enough space to store real tags for short granules.
+ DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap;
+ for (AllocaInst *AI : AllocasToInstrument) {
+ uint64_t Size = getAllocaSizeInBytes(*AI);
+ uint64_t AlignedSize = alignTo(Size, Mapping.getAllocaAlignment());
+ AI->setAlignment(std::max(AI->getAlignment(), 16u));
+ if (Size != AlignedSize) {
+ Type *AllocatedType = AI->getAllocatedType();
+ if (AI->isArrayAllocation()) {
+ uint64_t ArraySize =
+ cast<ConstantInt>(AI->getArraySize())->getZExtValue();
+ AllocatedType = ArrayType::get(AllocatedType, ArraySize);
+ }
+ Type *TypeWithPadding = StructType::get(
+ AllocatedType, ArrayType::get(Int8Ty, AlignedSize - Size));
+ auto *NewAI = new AllocaInst(
+ TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI);
+ NewAI->takeName(AI);
+ NewAI->setAlignment(AI->getAlignment());
+ NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca());
+ NewAI->setSwiftError(AI->isSwiftError());
+ NewAI->copyMetadata(*AI);
+ auto *Bitcast = new BitCastInst(NewAI, AI->getType(), "", AI);
+ AI->replaceAllUsesWith(Bitcast);
+ AllocaToPaddedAllocaMap[AI] = NewAI;
+ }
+ }
+
+ if (!AllocaToPaddedAllocaMap.empty()) {
+ for (auto &BB : F)
+ for (auto &Inst : BB)
+ if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst))
+ if (auto *AI =
+ dyn_cast_or_null<AllocaInst>(DVI->getVariableLocation()))
+ if (auto *NewAI = AllocaToPaddedAllocaMap.lookup(AI))
+ DVI->setArgOperand(
+ 0, MetadataAsValue::get(*C, LocalAsMetadata::get(NewAI)));
+ for (auto &P : AllocaToPaddedAllocaMap)
+ P.first->eraseFromParent();
+ }
+
+ // If we split the entry block, move any allocas that were originally in the
+ // entry block back into the entry block so that they aren't treated as
+ // dynamic allocas.
+ if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
+ InsertPt = &*F.getEntryBlock().begin();
+ for (auto II = EntryIRB.GetInsertBlock()->begin(),
+ IE = EntryIRB.GetInsertBlock()->end();
+ II != IE;) {
+ Instruction *I = &*II++;
+ if (auto *AI = dyn_cast<AllocaInst>(I))
+ if (isa<ConstantInt>(AI->getArraySize()))
+ I->moveBefore(InsertPt);
+ }
}
for (auto Inst : ToInstrument)
Changed |= instrumentMemAccess(Inst);
LocalDynamicShadow = nullptr;
+ StackBaseTag = nullptr;
return Changed;
}
diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 58436c8560ad..c7371f567ff3 100644
--- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -1,9 +1,8 @@
//===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -239,7 +238,7 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite(
LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
<< " Target_func: " << Target << "\n");
- if (ICPInvokeOnly && dyn_cast<CallInst>(Inst)) {
+ if (ICPInvokeOnly && isa<CallInst>(Inst)) {
LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst)
@@ -247,7 +246,7 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite(
});
break;
}
- if (ICPCallOnly && dyn_cast<InvokeInst>(Inst)) {
+ if (ICPCallOnly && isa<InvokeInst>(Inst)) {
LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst)
@@ -311,10 +310,10 @@ Instruction *llvm::pgo::promoteIndirectCall(Instruction *Inst,
promoteCallWithIfThenElse(CallSite(Inst), DirectCallee, BranchWeights);
if (AttachProfToDirectCall) {
- SmallVector<uint32_t, 1> Weights;
- Weights.push_back(Count);
MDBuilder MDB(NewInst->getContext());
- NewInst->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+ NewInst->setMetadata(
+ LLVMContext::MD_prof,
+ MDB.createBranchWeights({static_cast<uint32_t>(Count)}));
}
using namespace ore;
@@ -394,9 +393,7 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI,
}
bool Changed = false;
for (auto &F : M) {
- if (F.isDeclaration())
- continue;
- if (F.hasFnAttribute(Attribute::OptimizeNone))
+ if (F.isDeclaration() || F.hasOptNone())
continue;
std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
diff --git a/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/lib/Transforms/Instrumentation/InstrOrderFile.cpp
new file mode 100644
index 000000000000..a2c1ddfd279e
--- /dev/null
+++ b/lib/Transforms/Instrumentation/InstrOrderFile.cpp
@@ -0,0 +1,211 @@
+//===- InstrOrderFile.cpp ---- Late IR instrumentation for order file ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
+#include <fstream>
+#include <map>
+#include <mutex>
+#include <set>
+#include <sstream>
+
+using namespace llvm;
+#define DEBUG_TYPE "instrorderfile"
+
+static cl::opt<std::string> ClOrderFileWriteMapping(
+ "orderfile-write-mapping", cl::init(""),
+ cl::desc(
+ "Dump functions and their MD5 hash to deobfuscate profile data"),
+ cl::Hidden);
+
+namespace {
+
+// We need a global bitmap to tell if a function is executed. We also
+// need a global variable to save the order of functions. We can use a
+// fixed-size buffer that saves the MD5 hash of the function. We need
+// a global variable to save the index into the buffer.
+
+std::mutex MappingMutex;
+
+struct InstrOrderFile {
+private:
+ GlobalVariable *OrderFileBuffer;
+ GlobalVariable *BufferIdx;
+ GlobalVariable *BitMap;
+ ArrayType *BufferTy;
+ ArrayType *MapTy;
+
+public:
+ InstrOrderFile() {}
+
+ void createOrderFileData(Module &M) {
+ LLVMContext &Ctx = M.getContext();
+ int NumFunctions = 0;
+ for (Function &F : M) {
+ if (!F.isDeclaration())
+ NumFunctions++;
+ }
+
+ BufferTy =
+ ArrayType::get(Type::getInt64Ty(Ctx), INSTR_ORDER_FILE_BUFFER_SIZE);
+ Type *IdxTy = Type::getInt32Ty(Ctx);
+ MapTy = ArrayType::get(Type::getInt8Ty(Ctx), NumFunctions);
+
+ // Create the global variables.
+ std::string SymbolName = INSTR_PROF_ORDERFILE_BUFFER_NAME_STR;
+ OrderFileBuffer = new GlobalVariable(M, BufferTy, false, GlobalValue::LinkOnceODRLinkage,
+ Constant::getNullValue(BufferTy), SymbolName);
+ Triple TT = Triple(M.getTargetTriple());
+ OrderFileBuffer->setSection(
+ getInstrProfSectionName(IPSK_orderfile, TT.getObjectFormat()));
+
+ std::string IndexName = INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME_STR;
+ BufferIdx = new GlobalVariable(M, IdxTy, false, GlobalValue::LinkOnceODRLinkage,
+ Constant::getNullValue(IdxTy), IndexName);
+
+ std::string BitMapName = "bitmap_0";
+ BitMap = new GlobalVariable(M, MapTy, false, GlobalValue::PrivateLinkage,
+ Constant::getNullValue(MapTy), BitMapName);
+ }
+
+ // Generate the code sequence in the entry block of each function to
+ // update the buffer.
+ void generateCodeSequence(Module &M, Function &F, int FuncId) {
+ if (!ClOrderFileWriteMapping.empty()) {
+ std::lock_guard<std::mutex> LogLock(MappingMutex);
+ std::error_code EC;
+ llvm::raw_fd_ostream OS(ClOrderFileWriteMapping, EC, llvm::sys::fs::F_Append);
+ if (EC) {
+ report_fatal_error(Twine("Failed to open ") + ClOrderFileWriteMapping +
+ " to save mapping file for order file instrumentation\n");
+ } else {
+ std::stringstream stream;
+ stream << std::hex << MD5Hash(F.getName());
+ std::string singleLine = "MD5 " + stream.str() + " " +
+ std::string(F.getName()) + '\n';
+ OS << singleLine;
+ }
+ }
+
+ BasicBlock *OrigEntry = &F.getEntryBlock();
+
+ LLVMContext &Ctx = M.getContext();
+ IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
+ IntegerType *Int8Ty = Type::getInt8Ty(Ctx);
+
+ // Create a new entry block for instrumentation. We will check the bitmap
+ // in this basic block.
+ BasicBlock *NewEntry =
+ BasicBlock::Create(M.getContext(), "order_file_entry", &F, OrigEntry);
+ IRBuilder<> entryB(NewEntry);
+ // Create a basic block for updating the circular buffer.
+ BasicBlock *UpdateOrderFileBB =
+ BasicBlock::Create(M.getContext(), "order_file_set", &F, OrigEntry);
+ IRBuilder<> updateB(UpdateOrderFileBB);
+
+ // Check the bitmap, if it is already 1, do nothing.
+ // Otherwise, set the bit, grab the index, update the buffer.
+ Value *IdxFlags[] = {ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, FuncId)};
+ Value *MapAddr = entryB.CreateGEP(MapTy, BitMap, IdxFlags, "");
+ LoadInst *loadBitMap = entryB.CreateLoad(Int8Ty, MapAddr, "");
+ entryB.CreateStore(ConstantInt::get(Int8Ty, 1), MapAddr);
+ Value *IsNotExecuted =
+ entryB.CreateICmpEQ(loadBitMap, ConstantInt::get(Int8Ty, 0));
+ entryB.CreateCondBr(IsNotExecuted, UpdateOrderFileBB, OrigEntry);
+
+ // Fill up UpdateOrderFileBB: grab the index, update the buffer!
+ Value *IdxVal = updateB.CreateAtomicRMW(
+ AtomicRMWInst::Add, BufferIdx, ConstantInt::get(Int32Ty, 1),
+ AtomicOrdering::SequentiallyConsistent);
+ // We need to wrap around the index to fit it inside the buffer.
+ Value *WrappedIdx = updateB.CreateAnd(
+ IdxVal, ConstantInt::get(Int32Ty, INSTR_ORDER_FILE_BUFFER_MASK));
+ Value *BufferGEPIdx[] = {ConstantInt::get(Int32Ty, 0), WrappedIdx};
+ Value *BufferAddr =
+ updateB.CreateGEP(BufferTy, OrderFileBuffer, BufferGEPIdx, "");
+ updateB.CreateStore(ConstantInt::get(Type::getInt64Ty(Ctx), MD5Hash(F.getName())),
+ BufferAddr);
+ updateB.CreateBr(OrigEntry);
+ }
+
+ bool run(Module &M) {
+ createOrderFileData(M);
+
+ int FuncId = 0;
+ for (Function &F : M) {
+ if (F.isDeclaration())
+ continue;
+ generateCodeSequence(M, F, FuncId);
+ ++FuncId;
+ }
+
+ return true;
+ }
+
+}; // End of InstrOrderFile struct
+
+class InstrOrderFileLegacyPass : public ModulePass {
+public:
+ static char ID;
+
+ InstrOrderFileLegacyPass() : ModulePass(ID) {
+ initializeInstrOrderFileLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+};
+
+} // End anonymous namespace
+
+bool InstrOrderFileLegacyPass::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
+ return InstrOrderFile().run(M);
+}
+
+PreservedAnalyses
+InstrOrderFilePass::run(Module &M, ModuleAnalysisManager &AM) {
+ if (InstrOrderFile().run(M))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+INITIALIZE_PASS_BEGIN(InstrOrderFileLegacyPass, "instrorderfile",
+ "Instrumentation for Order File", false, false)
+INITIALIZE_PASS_END(InstrOrderFileLegacyPass, "instrorderfile",
+ "Instrumentation for Order File", false, false)
+
+char InstrOrderFileLegacyPass::ID = 0;
+
+ModulePass *llvm::createInstrOrderFilePass() {
+ return new InstrOrderFileLegacyPass();
+}
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 15b94388cbe5..63c2b8078967 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -1,9 +1,8 @@
//===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,6 +18,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Attributes.h"
@@ -148,8 +149,8 @@ public:
static char ID;
InstrProfilingLegacyPass() : ModulePass(ID) {}
- InstrProfilingLegacyPass(const InstrProfOptions &Options)
- : ModulePass(ID), InstrProf(Options) {}
+ InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS = false)
+ : ModulePass(ID), InstrProf(Options, IsCS) {}
StringRef getPassName() const override {
return "Frontend instrumentation-based coverage lowering";
@@ -187,7 +188,7 @@ public:
SSA.AddAvailableValue(PH, Init);
}
- void doExtraRewritesBeforeFinalDeletion() const override {
+ void doExtraRewritesBeforeFinalDeletion() override {
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = ExitBlocks[i];
Instruction *InsertPos = InsertPts[i];
@@ -196,6 +197,7 @@ public:
// block.
Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
+ Type *Ty = LiveInValue->getType();
IRBuilder<> Builder(InsertPos);
if (AtomicCounterUpdatePromoted)
// automic update currently can only be promoted across the current
@@ -203,7 +205,7 @@ public:
Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
AtomicOrdering::SequentiallyConsistent);
else {
- LoadInst *OldVal = Builder.CreateLoad(Addr, "pgocount.promoted");
+ LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted");
auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
auto *NewStore = Builder.CreateStore(NewVal, Addr);
@@ -232,9 +234,9 @@ class PGOCounterPromoter {
public:
PGOCounterPromoter(
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
- Loop &CurLoop, LoopInfo &LI)
+ Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)
: LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop),
- LI(LI) {
+ LI(LI), BFI(BFI) {
SmallVector<BasicBlock *, 8> LoopExitBlocks;
SmallPtrSet<BasicBlock *, 8> BlockSet;
@@ -263,6 +265,20 @@ public:
SSAUpdater SSA(&NewPHIs);
Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
+ // If BFI is set, we will use it to guide the promotions.
+ if (BFI) {
+ auto *BB = Cand.first->getParent();
+ auto InstrCount = BFI->getBlockProfileCount(BB);
+ if (!InstrCount)
+ continue;
+ auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader());
+ // If the average loop trip count is not greater than 1.5, we skip
+ // promotion.
+ if (PreheaderCount &&
+ (PreheaderCount.getValue() * 3) >= (InstrCount.getValue() * 2))
+ continue;
+ }
+
PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
L.getLoopPreheader(), ExitBlocks,
InsertPts, LoopToCandidates, LI);
@@ -312,6 +328,11 @@ private:
SmallVector<BasicBlock *, 8> ExitingBlocks;
LP->getExitingBlocks(ExitingBlocks);
+
+ // If BFI is set, we do more aggressive promotions based on BFI.
+ if (BFI)
+ return (unsigned)-1;
+
// Not considierered speculative.
if (ExitingBlocks.size() == 1)
return MaxNumOfPromotionsPerLoop;
@@ -343,6 +364,7 @@ private:
SmallVector<Instruction *, 8> InsertPts;
Loop &L;
LoopInfo &LI;
+ BlockFrequencyInfo *BFI;
};
} // end anonymous namespace
@@ -365,8 +387,9 @@ INITIALIZE_PASS_END(
"Frontend instrumentation-based coverage lowering.", false, false)
ModulePass *
-llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) {
- return new InstrProfilingLegacyPass(Options);
+llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options,
+ bool IsCS) {
+ return new InstrProfilingLegacyPass(Options, IsCS);
}
static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
@@ -415,6 +438,13 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) {
LoopInfo LI(DT);
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
+ std::unique_ptr<BlockFrequencyInfo> BFI;
+ if (Options.UseBFIInPromotion) {
+ std::unique_ptr<BranchProbabilityInfo> BPI;
+ BPI.reset(new BranchProbabilityInfo(*F, LI, TLI));
+ BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI));
+ }
+
for (const auto &LoadStore : PromotionCandidates) {
auto *CounterLoad = LoadStore.first;
auto *CounterStore = LoadStore.second;
@@ -430,7 +460,7 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) {
// Do a post-order traversal of the loops so that counter updates can be
// iteratively hoisted outside the loop nest.
for (auto *Loop : llvm::reverse(Loops)) {
- PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI);
+ PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
Promoter.run(&TotalCountersPromoted);
}
}
@@ -509,13 +539,16 @@ bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) {
return true;
}
-static Constant *getOrInsertValueProfilingCall(Module &M,
- const TargetLibraryInfo &TLI,
- bool IsRange = false) {
+static FunctionCallee
+getOrInsertValueProfilingCall(Module &M, const TargetLibraryInfo &TLI,
+ bool IsRange = false) {
LLVMContext &Ctx = M.getContext();
auto *ReturnTy = Type::getVoidTy(M.getContext());
- Constant *Res;
+ AttributeList AL;
+ if (auto AK = TLI.getExtAttrForI32Param(false))
+ AL = AL.addParamAttribute(M.getContext(), 2, AK);
+
if (!IsRange) {
Type *ParamTypes[] = {
#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
@@ -523,8 +556,8 @@ static Constant *getOrInsertValueProfilingCall(Module &M,
};
auto *ValueProfilingCallTy =
FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
- Res = M.getOrInsertFunction(getInstrProfValueProfFuncName(),
- ValueProfilingCallTy);
+ return M.getOrInsertFunction(getInstrProfValueProfFuncName(),
+ ValueProfilingCallTy, AL);
} else {
Type *RangeParamTypes[] = {
#define VALUE_RANGE_PROF 1
@@ -534,15 +567,9 @@ static Constant *getOrInsertValueProfilingCall(Module &M,
};
auto *ValueRangeProfilingCallTy =
FunctionType::get(ReturnTy, makeArrayRef(RangeParamTypes), false);
- Res = M.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
- ValueRangeProfilingCallTy);
+ return M.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
+ ValueRangeProfilingCallTy, AL);
}
-
- if (Function *FunRes = dyn_cast<Function>(Res)) {
- if (auto AK = TLI.getExtAttrForI32Param(false))
- FunRes->addParamAttr(2, AK);
- }
- return Res;
}
void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
@@ -601,13 +628,15 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
IRBuilder<> Builder(Inc);
uint64_t Index = Inc->getIndex()->getZExtValue();
- Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
+ Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters->getValueType(),
+ Counters, 0, Index);
if (Options.Atomic || AtomicCounterUpdateAll) {
Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
AtomicOrdering::Monotonic);
} else {
- Value *Load = Builder.CreateLoad(Addr, "pgocount");
+ Value *IncStep = Inc->getStep();
+ Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount");
auto *Count = Builder.CreateAdd(Load, Inc->getStep());
auto *Store = Builder.CreateStore(Count, Addr);
if (isCounterPromotionEnabled())
@@ -678,32 +707,14 @@ static inline bool shouldRecordFunctionAddr(Function *F) {
return F->hasAddressTaken() || F->hasLinkOnceLinkage();
}
-static inline Comdat *getOrCreateProfileComdat(Module &M, Function &F,
- InstrProfIncrementInst *Inc) {
- if (!needsComdatForCounter(F, M))
- return nullptr;
-
- // COFF format requires a COMDAT section to have a key symbol with the same
- // name. The linker targeting COFF also requires that the COMDAT
- // a section is associated to must precede the associating section. For this
- // reason, we must choose the counter var's name as the name of the comdat.
- StringRef ComdatPrefix = (Triple(M.getTargetTriple()).isOSBinFormatCOFF()
- ? getInstrProfCountersVarPrefix()
- : getInstrProfComdatPrefix());
- return M.getOrInsertComdat(StringRef(getVarName(Inc, ComdatPrefix)));
-}
-
-static bool needsRuntimeRegistrationOfSectionRange(const Module &M) {
+static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
// Don't do this for Darwin. compiler-rt uses linker magic.
- if (Triple(M.getTargetTriple()).isOSDarwin())
+ if (TT.isOSDarwin())
return false;
-
// Use linker script magic to get data/cnts/name start/end.
- if (Triple(M.getTargetTriple()).isOSLinux() ||
- Triple(M.getTargetTriple()).isOSFreeBSD() ||
- Triple(M.getTargetTriple()).isOSNetBSD() ||
- Triple(M.getTargetTriple()).isOSFuchsia() ||
- Triple(M.getTargetTriple()).isPS4CPU())
+ if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() ||
+ TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS4CPU() ||
+ TT.isOSWindows())
return false;
return true;
@@ -720,13 +731,37 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
PD = It->second;
}
- // Move the name variable to the right section. Place them in a COMDAT group
- // if the associated function is a COMDAT. This will make sure that
- // only one copy of counters of the COMDAT function will be emitted after
- // linking.
+ // Match the linkage and visibility of the name global, except on COFF, where
+ // the linkage must be local and consequentially the visibility must be
+ // default.
Function *Fn = Inc->getParent()->getParent();
- Comdat *ProfileVarsComdat = nullptr;
- ProfileVarsComdat = getOrCreateProfileComdat(*M, *Fn, Inc);
+ GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
+ GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
+ if (TT.isOSBinFormatCOFF()) {
+ Linkage = GlobalValue::InternalLinkage;
+ Visibility = GlobalValue::DefaultVisibility;
+ }
+
+ // Move the name variable to the right section. Place them in a COMDAT group
+ // if the associated function is a COMDAT. This will make sure that only one
+ // copy of counters of the COMDAT function will be emitted after linking. Keep
+ // in mind that this pass may run before the inliner, so we need to create a
+ // new comdat group for the counters and profiling data. If we use the comdat
+ // of the parent function, that will result in relocations against discarded
+ // sections.
+ Comdat *Cmdt = nullptr;
+ GlobalValue::LinkageTypes CounterLinkage = Linkage;
+ if (needsComdatForCounter(*Fn, *M)) {
+ StringRef CmdtPrefix = getInstrProfComdatPrefix();
+ if (TT.isOSBinFormatCOFF()) {
+ // For COFF, the comdat group name must be the name of a symbol in the
+ // group. Use the counter variable name, and upgrade its linkage to
+ // something externally visible, like linkonce_odr.
+ CmdtPrefix = getInstrProfCountersVarPrefix();
+ CounterLinkage = GlobalValue::LinkOnceODRLinkage;
+ }
+ Cmdt = M->getOrInsertComdat(getVarName(Inc, CmdtPrefix));
+ }
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
LLVMContext &Ctx = M->getContext();
@@ -734,20 +769,21 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
// Create the counters variable.
auto *CounterPtr =
- new GlobalVariable(*M, CounterTy, false, NamePtr->getLinkage(),
+ new GlobalVariable(*M, CounterTy, false, Linkage,
Constant::getNullValue(CounterTy),
getVarName(Inc, getInstrProfCountersVarPrefix()));
- CounterPtr->setVisibility(NamePtr->getVisibility());
+ CounterPtr->setVisibility(Visibility);
CounterPtr->setSection(
getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
CounterPtr->setAlignment(8);
- CounterPtr->setComdat(ProfileVarsComdat);
+ CounterPtr->setComdat(Cmdt);
+ CounterPtr->setLinkage(CounterLinkage);
auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
// Allocate statically the array of pointers to value profile nodes for
// the current function.
Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
- if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(*M)) {
+ if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(TT)) {
uint64_t NS = 0;
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
NS += PD.NumValueSites[Kind];
@@ -755,14 +791,14 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
auto *ValuesVar =
- new GlobalVariable(*M, ValuesTy, false, NamePtr->getLinkage(),
+ new GlobalVariable(*M, ValuesTy, false, Linkage,
Constant::getNullValue(ValuesTy),
getVarName(Inc, getInstrProfValuesVarPrefix()));
- ValuesVar->setVisibility(NamePtr->getVisibility());
+ ValuesVar->setVisibility(Visibility);
ValuesVar->setSection(
getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
ValuesVar->setAlignment(8);
- ValuesVar->setComdat(ProfileVarsComdat);
+ ValuesVar->setComdat(Cmdt);
ValuesPtrExpr =
ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
}
@@ -789,13 +825,13 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
#include "llvm/ProfileData/InstrProfData.inc"
};
- auto *Data = new GlobalVariable(*M, DataTy, false, NamePtr->getLinkage(),
+ auto *Data = new GlobalVariable(*M, DataTy, false, Linkage,
ConstantStruct::get(DataTy, DataVals),
getVarName(Inc, getInstrProfDataVarPrefix()));
- Data->setVisibility(NamePtr->getVisibility());
+ Data->setVisibility(Visibility);
Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT);
- Data->setComdat(ProfileVarsComdat);
+ Data->setComdat(Cmdt);
PD.RegionCounters = CounterPtr;
PD.DataVar = Data;
@@ -820,7 +856,7 @@ void InstrProfiling::emitVNodes() {
// For now only support this on platforms that do
// not require runtime registration to discover
// named section start/end.
- if (needsRuntimeRegistrationOfSectionRange(*M))
+ if (needsRuntimeRegistrationOfSectionRange(TT))
return;
size_t TotalNS = 0;
@@ -881,6 +917,10 @@ void InstrProfiling::emitNameData() {
NamesSize = CompressedNameStr.size();
NamesVar->setSection(
getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
+ // On COFF, it's important to reduce the alignment down to 1 to prevent the
+ // linker from inserting padding before the start of the names section or
+ // between names entries.
+ NamesVar->setAlignment(1);
UsedVars.push_back(NamesVar);
for (auto *NamePtr : ReferencedNames)
@@ -888,7 +928,7 @@ void InstrProfiling::emitNameData() {
}
void InstrProfiling::emitRegistration() {
- if (!needsRuntimeRegistrationOfSectionRange(*M))
+ if (!needsRuntimeRegistrationOfSectionRange(TT))
return;
// Construct the function.
@@ -929,7 +969,7 @@ void InstrProfiling::emitRegistration() {
bool InstrProfiling::emitRuntimeHook() {
// We expect the linker to be invoked with -u<hook_var> flag for linux,
// for which case there is no need to emit the user function.
- if (Triple(M->getTargetTriple()).isOSLinux())
+ if (TT.isOSLinux())
return false;
// If the module's provided its own runtime, we don't need to do anything.
@@ -950,11 +990,11 @@ bool InstrProfiling::emitRuntimeHook() {
if (Options.NoRedZone)
User->addFnAttr(Attribute::NoRedZone);
User->setVisibility(GlobalValue::HiddenVisibility);
- if (Triple(M->getTargetTriple()).supportsCOMDAT())
+ if (TT.supportsCOMDAT())
User->setComdat(M->getOrInsertComdat(User->getName()));
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
- auto *Load = IRB.CreateLoad(Var);
+ auto *Load = IRB.CreateLoad(Int32Ty, Var);
IRB.CreateRet(Load);
// Mark the user variable as used so that it isn't stripped out.
@@ -968,23 +1008,13 @@ void InstrProfiling::emitUses() {
}
void InstrProfiling::emitInitialization() {
- StringRef InstrProfileOutput = Options.InstrProfileOutput;
-
- if (!InstrProfileOutput.empty()) {
- // Create variable for profile name.
- Constant *ProfileNameConst =
- ConstantDataArray::getString(M->getContext(), InstrProfileOutput, true);
- GlobalVariable *ProfileNameVar = new GlobalVariable(
- *M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage,
- ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR));
- if (TT.supportsCOMDAT()) {
- ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
- ProfileNameVar->setComdat(M->getOrInsertComdat(
- StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR))));
- }
- }
-
- Constant *RegisterF = M->getFunction(getInstrProfRegFuncsName());
+ // Create ProfileFileName variable. Don't don't this for the
+ // context-sensitive instrumentation lowering: This lowering is after
+ // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
+ // have already create the variable before LTO/ThinLTO linking.
+ if (!IsCS)
+ createProfileFileNameVar(*M, Options.InstrProfileOutput);
+ Function *RegisterF = M->getFunction(getInstrProfRegFuncsName());
if (!RegisterF)
return;
@@ -1000,8 +1030,7 @@ void InstrProfiling::emitInitialization() {
// Add the basic block and the necessary calls.
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
- if (RegisterF)
- IRB.CreateCall(RegisterF, {});
+ IRB.CreateCall(RegisterF, {});
IRB.CreateRetVoid();
appendToGlobalCtors(*M, F, 0);
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index c3e323613c70..f56a1bd91b89 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -1,9 +1,8 @@
//===-- Instrumentation.cpp - TransformUtils Infrastructure ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,10 +24,12 @@ using namespace llvm;
/// Moves I before IP. Returns new insert point.
static BasicBlock::iterator moveBeforeInsertPoint(BasicBlock::iterator I, BasicBlock::iterator IP) {
// If I is IP, move the insert point down.
- if (I == IP)
- return ++IP;
- // Otherwise, move I before IP and return IP.
- I->moveBefore(&*IP);
+ if (I == IP) {
+ ++IP;
+ } else {
+ // Otherwise, move I before IP and return IP.
+ I->moveBefore(&*IP);
+ }
return IP;
}
@@ -101,8 +102,8 @@ Comdat *llvm::GetOrCreateFunctionComdat(Function &F, Triple &T,
/// initializeInstrumentation - Initialize all passes in the TransformUtils
/// library.
void llvm::initializeInstrumentation(PassRegistry &Registry) {
- initializeAddressSanitizerPass(Registry);
- initializeAddressSanitizerModulePass(Registry);
+ initializeAddressSanitizerLegacyPassPass(Registry);
+ initializeModuleAddressSanitizerLegacyPassPass(Registry);
initializeBoundsCheckingLegacyPassPass(Registry);
initializeControlHeightReductionLegacyPassPass(Registry);
initializeGCOVProfilerLegacyPassPass(Registry);
@@ -110,13 +111,13 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
initializePGOInstrumentationUseLegacyPassPass(Registry);
initializePGOIndirectCallPromotionLegacyPassPass(Registry);
initializePGOMemOPSizeOptLegacyPassPass(Registry);
+ initializeInstrOrderFileLegacyPassPass(Registry);
initializeInstrProfilingLegacyPassPass(Registry);
initializeMemorySanitizerLegacyPassPass(Registry);
- initializeHWAddressSanitizerPass(Registry);
+ initializeHWAddressSanitizerLegacyPassPass(Registry);
initializeThreadSanitizerLegacyPassPass(Registry);
initializeSanitizerCoverageModulePass(Registry);
initializeDataFlowSanitizerPass(Registry);
- initializeEfficiencySanitizerPass(Registry);
}
/// LLVMInitializeInstrumentation - C binding for
diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
index 4eb758c69c58..892a6a26da91 100644
--- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h
+++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/MaximumSpanningTree.h - Interface ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -68,8 +67,7 @@ namespace llvm {
/// MaximumSpanningTree() - Takes a vector of weighted edges and returns a
/// spanning tree.
MaximumSpanningTree(EdgeWeights &EdgeVector) {
-
- std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare());
+ llvm::stable_sort(EdgeVector, EdgeWeightCompare());
// Create spanning tree, Forest contains a special data structure
// that makes checking if two nodes are already in a common (sub-)tree
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index e6573af2077d..b25cbed1bb02 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1,9 +1,8 @@
//===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -144,6 +143,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -248,6 +248,13 @@ static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
cl::desc("exact handling of relational integer ICmp"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> ClHandleLifetimeIntrinsics(
+ "msan-handle-lifetime-intrinsics",
+ cl::desc(
+ "when possible, poison scoped variables at the beginning of the scope "
+ "(slower, but more precise)"),
+ cl::Hidden, cl::init(true));
+
// When compiling the Linux kernel, we sometimes see false positives related to
// MSan being unable to understand that inline assembly calls may initialize
// local variables.
@@ -305,22 +312,23 @@ static cl::opt<bool> ClWithComdat("msan-with-comdat",
// These options allow to specify custom memory map parameters
// See MemoryMapParams for details.
-static cl::opt<unsigned long long> ClAndMask("msan-and-mask",
- cl::desc("Define custom MSan AndMask"),
- cl::Hidden, cl::init(0));
+static cl::opt<uint64_t> ClAndMask("msan-and-mask",
+ cl::desc("Define custom MSan AndMask"),
+ cl::Hidden, cl::init(0));
-static cl::opt<unsigned long long> ClXorMask("msan-xor-mask",
- cl::desc("Define custom MSan XorMask"),
- cl::Hidden, cl::init(0));
+static cl::opt<uint64_t> ClXorMask("msan-xor-mask",
+ cl::desc("Define custom MSan XorMask"),
+ cl::Hidden, cl::init(0));
-static cl::opt<unsigned long long> ClShadowBase("msan-shadow-base",
- cl::desc("Define custom MSan ShadowBase"),
- cl::Hidden, cl::init(0));
+static cl::opt<uint64_t> ClShadowBase("msan-shadow-base",
+ cl::desc("Define custom MSan ShadowBase"),
+ cl::Hidden, cl::init(0));
-static cl::opt<unsigned long long> ClOriginBase("msan-origin-base",
- cl::desc("Define custom MSan OriginBase"),
- cl::Hidden, cl::init(0));
+static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
+ cl::desc("Define custom MSan OriginBase"),
+ cl::Hidden, cl::init(0));
+static const char *const kMsanModuleCtorName = "msan.module_ctor";
static const char *const kMsanInitName = "__msan_init";
namespace {
@@ -454,17 +462,16 @@ namespace {
/// the module.
class MemorySanitizer {
public:
- MemorySanitizer(Module &M, int TrackOrigins = 0, bool Recover = false,
- bool EnableKmsan = false) {
+ MemorySanitizer(Module &M, MemorySanitizerOptions Options) {
this->CompileKernel =
- ClEnableKmsan.getNumOccurrences() > 0 ? ClEnableKmsan : EnableKmsan;
+ ClEnableKmsan.getNumOccurrences() > 0 ? ClEnableKmsan : Options.Kernel;
if (ClTrackOrigins.getNumOccurrences() > 0)
this->TrackOrigins = ClTrackOrigins;
else
- this->TrackOrigins = this->CompileKernel ? 2 : TrackOrigins;
+ this->TrackOrigins = this->CompileKernel ? 2 : Options.TrackOrigins;
this->Recover = ClKeepGoing.getNumOccurrences() > 0
? ClKeepGoing
- : (this->CompileKernel | Recover);
+ : (this->CompileKernel | Options.Recover);
initializeModule(M);
}
@@ -536,41 +543,42 @@ private:
bool CallbacksInitialized = false;
/// The run-time callback to print a warning.
- Value *WarningFn;
+ FunctionCallee WarningFn;
// These arrays are indexed by log2(AccessSize).
- Value *MaybeWarningFn[kNumberOfAccessSizes];
- Value *MaybeStoreOriginFn[kNumberOfAccessSizes];
+ FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
+ FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
/// Run-time helper that generates a new origin value for a stack
/// allocation.
- Value *MsanSetAllocaOrigin4Fn;
+ FunctionCallee MsanSetAllocaOrigin4Fn;
/// Run-time helper that poisons stack on function entry.
- Value *MsanPoisonStackFn;
+ FunctionCallee MsanPoisonStackFn;
/// Run-time helper that records a store (or any event) of an
/// uninitialized value and returns an updated origin id encoding this info.
- Value *MsanChainOriginFn;
+ FunctionCallee MsanChainOriginFn;
/// MSan runtime replacements for memmove, memcpy and memset.
- Value *MemmoveFn, *MemcpyFn, *MemsetFn;
+ FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
/// KMSAN callback for task-local function argument shadow.
- Value *MsanGetContextStateFn;
+ StructType *MsanContextStateTy;
+ FunctionCallee MsanGetContextStateFn;
/// Functions for poisoning/unpoisoning local variables
- Value *MsanPoisonAllocaFn, *MsanUnpoisonAllocaFn;
+ FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
/// Each of the MsanMetadataPtrXxx functions returns a pair of shadow/origin
/// pointers.
- Value *MsanMetadataPtrForLoadN, *MsanMetadataPtrForStoreN;
- Value *MsanMetadataPtrForLoad_1_8[4];
- Value *MsanMetadataPtrForStore_1_8[4];
- Value *MsanInstrumentAsmStoreFn;
+ FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
+ FunctionCallee MsanMetadataPtrForLoad_1_8[4];
+ FunctionCallee MsanMetadataPtrForStore_1_8[4];
+ FunctionCallee MsanInstrumentAsmStoreFn;
/// Helper to choose between different MsanMetadataPtrXxx().
- Value *getKmsanShadowOriginAccessFn(bool isStore, int size);
+ FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
/// Memory map parameters used in application-to-shadow calculation.
const MemoryMapParams *MapParams;
@@ -586,6 +594,8 @@ private:
/// An empty volatile inline asm that prevents callback merge.
InlineAsm *EmptyAsm;
+
+ Function *MsanCtorFunction;
};
/// A legacy function pass for msan instrumentation.
@@ -595,10 +605,8 @@ struct MemorySanitizerLegacyPass : public FunctionPass {
// Pass identification, replacement for typeid.
static char ID;
- MemorySanitizerLegacyPass(int TrackOrigins = 0, bool Recover = false,
- bool EnableKmsan = false)
- : FunctionPass(ID), TrackOrigins(TrackOrigins), Recover(Recover),
- EnableKmsan(EnableKmsan) {}
+ MemorySanitizerLegacyPass(MemorySanitizerOptions Options = {})
+ : FunctionPass(ID), Options(Options) {}
StringRef getPassName() const override { return "MemorySanitizerLegacyPass"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -612,16 +620,14 @@ struct MemorySanitizerLegacyPass : public FunctionPass {
bool doInitialization(Module &M) override;
Optional<MemorySanitizer> MSan;
- int TrackOrigins;
- bool Recover;
- bool EnableKmsan;
+ MemorySanitizerOptions Options;
};
} // end anonymous namespace
PreservedAnalyses MemorySanitizerPass::run(Function &F,
FunctionAnalysisManager &FAM) {
- MemorySanitizer Msan(*F.getParent(), TrackOrigins, Recover, EnableKmsan);
+ MemorySanitizer Msan(*F.getParent(), Options);
if (Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
@@ -637,10 +643,9 @@ INITIALIZE_PASS_END(MemorySanitizerLegacyPass, "msan",
"MemorySanitizer: detects uninitialized reads.", false,
false)
-FunctionPass *llvm::createMemorySanitizerLegacyPassPass(int TrackOrigins,
- bool Recover,
- bool CompileKernel) {
- return new MemorySanitizerLegacyPass(TrackOrigins, Recover, CompileKernel);
+FunctionPass *
+llvm::createMemorySanitizerLegacyPassPass(MemorySanitizerOptions Options) {
+ return new MemorySanitizerLegacyPass(Options);
}
/// Create a non-const global initialized with the given string.
@@ -675,18 +680,15 @@ void MemorySanitizer::createKernelApi(Module &M) {
IRB.getInt32Ty());
// Requests the per-task context state (kmsan_context_state*) from the
// runtime library.
+ MsanContextStateTy = StructType::get(
+ ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
+ ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
+ ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
+ ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), /* va_arg_origin */
+ IRB.getInt64Ty(), ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
+ OriginTy);
MsanGetContextStateFn = M.getOrInsertFunction(
- "__msan_get_context_state",
- PointerType::get(
- StructType::get(ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
- ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
- ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
- ArrayType::get(IRB.getInt64Ty(),
- kParamTLSSize / 8), /* va_arg_origin */
- IRB.getInt64Ty(),
- ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
- OriginTy),
- 0));
+ "__msan_get_context_state", PointerType::get(MsanContextStateTy, 0));
Type *RetTy = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
PointerType::get(IRB.getInt32Ty(), 0));
@@ -821,8 +823,9 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
CallbacksInitialized = true;
}
-Value *MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore, int size) {
- Value **Fns =
+FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
+ int size) {
+ FunctionCallee *Fns =
isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
switch (size) {
case 1:
@@ -839,6 +842,8 @@ Value *MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore, int size) {
}
/// Module-level initialization.
+///
+/// inserts a call to __msan_init to the module's constructor list.
void MemorySanitizer::initializeModule(Module &M) {
auto &DL = M.getDataLayout();
@@ -913,7 +918,22 @@ void MemorySanitizer::initializeModule(Module &M) {
OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
if (!CompileKernel) {
- getOrCreateInitFunction(M, kMsanInitName);
+ std::tie(MsanCtorFunction, std::ignore) =
+ getOrCreateSanitizerCtorAndInitFunctions(
+ M, kMsanModuleCtorName, kMsanInitName,
+ /*InitArgTypes=*/{},
+ /*InitArgs=*/{},
+ // This callback is invoked when the functions are created the first
+ // time. Hook them into the global ctors list in that case:
+ [&](Function *Ctor, FunctionCallee) {
+ if (!ClWithComdat) {
+ appendToGlobalCtors(M, Ctor, 0);
+ return;
+ }
+ Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
+ Ctor->setComdat(MsanCtorComdat);
+ appendToGlobalCtors(M, Ctor, 0, Ctor);
+ });
if (TrackOrigins)
M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
@@ -932,7 +952,7 @@ void MemorySanitizer::initializeModule(Module &M) {
}
bool MemorySanitizerLegacyPass::doInitialization(Module &M) {
- MSan.emplace(M, TrackOrigins, Recover, EnableKmsan);
+ MSan.emplace(M, Options);
return true;
}
@@ -1011,6 +1031,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
: Shadow(S), Origin(O), OrigIns(I) {}
};
SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
+ bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics;
+ SmallSet<AllocaInst *, 16> AllocaSet;
+ SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
SmallVector<StoreInst *, 16> StoreList;
MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
@@ -1076,7 +1099,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
Value *GEP =
- i ? IRB.CreateConstGEP1_32(nullptr, OriginPtr, i) : OriginPtr;
+ i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr;
IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
CurrentAlignment = kMinOriginAlignment;
}
@@ -1104,7 +1127,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
DL.getTypeSizeInBits(ConvertedShadow->getType());
unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
- Value *Fn = MS.MaybeStoreOriginFn[SizeIndex];
+ FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
Value *ConvertedShadow2 = IRB.CreateZExt(
ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
IRB.CreateCall(Fn, {ConvertedShadow2,
@@ -1186,7 +1209,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
- Value *Fn = MS.MaybeWarningFn[SizeIndex];
+ FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
Value *ConvertedShadow2 =
IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
IRB.CreateCall(Fn, {ConvertedShadow2, MS.TrackOrigins && Origin
@@ -1221,20 +1244,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
Constant *Zero = IRB.getInt32(0);
- MS.ParamTLS =
- IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(0)}, "param_shadow");
- MS.RetvalTLS =
- IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(1)}, "retval_shadow");
- MS.VAArgTLS =
- IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(2)}, "va_arg_shadow");
- MS.VAArgOriginTLS =
- IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(3)}, "va_arg_origin");
- MS.VAArgOverflowSizeTLS = IRB.CreateGEP(
- ContextState, {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
- MS.ParamOriginTLS =
- IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(5)}, "param_origin");
+ MS.ParamTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(0)}, "param_shadow");
+ MS.RetvalTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(1)}, "retval_shadow");
+ MS.VAArgTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(2)}, "va_arg_shadow");
+ MS.VAArgOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(3)}, "va_arg_origin");
+ MS.VAArgOverflowSizeTLS =
+ IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
+ MS.ParamOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(5)}, "param_origin");
MS.RetvalOriginTLS =
- IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(6)}, "retval_origin");
+ IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(6)}, "retval_origin");
return ret;
}
@@ -1265,6 +1290,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
VAHelper->finalizeInstrumentation();
+ // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to
+ // instrumenting only allocas.
+ if (InstrumentLifetimeStart) {
+ for (auto Item : LifetimeStartList) {
+ instrumentAlloca(*Item.second, Item.first);
+ AllocaSet.erase(Item.second);
+ }
+ }
+ // Poison the allocas for which we didn't instrument the corresponding
+ // lifetime intrinsics.
+ for (AllocaInst *AI : AllocaSet)
+ instrumentAlloca(*AI);
+
bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 &&
InstrumentationList.size() + StoreList.size() >
(unsigned)ClInstrumentationWithCallThreshold;
@@ -1381,7 +1419,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRB.CreateAnd(OriginLong, ConstantInt::get(MS.IntptrTy, ~Mask));
}
OriginPtr =
- IRB.CreateIntToPtr(OriginLong, PointerType::get(IRB.getInt32Ty(), 0));
+ IRB.CreateIntToPtr(OriginLong, PointerType::get(MS.OriginTy, 0));
}
return std::make_pair(ShadowPtr, OriginPtr);
}
@@ -1393,7 +1431,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
const DataLayout &DL = F.getParent()->getDataLayout();
int Size = DL.getTypeStoreSize(ShadowTy);
- Value *Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
+ FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
Value *AddrCast =
IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0));
if (Getter) {
@@ -1598,8 +1636,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// ParamTLS overflow.
*ShadowPtr = getCleanShadow(V);
} else {
- *ShadowPtr =
- EntryIRB.CreateAlignedLoad(Base, kShadowTLSAlignment);
+ *ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
+ kShadowTLSAlignment);
}
}
LLVM_DEBUG(dbgs()
@@ -1607,7 +1645,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (MS.TrackOrigins && !Overflow) {
Value *OriginPtr =
getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
- setOrigin(A, EntryIRB.CreateLoad(OriginPtr));
+ setOrigin(A, EntryIRB.CreateLoad(MS.OriginTy, OriginPtr));
} else {
setOrigin(A, getCleanOrigin());
}
@@ -1738,7 +1776,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (PropagateShadow) {
std::tie(ShadowPtr, OriginPtr) =
getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
- setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_msld"));
+ setShadow(&I,
+ IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
} else {
setShadow(&I, getCleanShadow(&I));
}
@@ -1752,7 +1791,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (MS.TrackOrigins) {
if (PropagateShadow) {
unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
- setOrigin(&I, IRB.CreateAlignedLoad(OriginPtr, OriginAlignment));
+ setOrigin(
+ &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment));
} else {
setOrigin(&I, getCleanOrigin());
}
@@ -1903,7 +1943,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *S1S2 = IRB.CreateAnd(S1, S2);
Value *V1S2 = IRB.CreateAnd(V1, S2);
Value *S1V2 = IRB.CreateAnd(S1, V2);
- setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
+ setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
setOriginForNaryOp(I);
}
@@ -1925,7 +1965,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *S1S2 = IRB.CreateAnd(S1, S2);
Value *V1S2 = IRB.CreateAnd(V1, S2);
Value *S1V2 = IRB.CreateAnd(S1, V2);
- setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
+ setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
setOriginForNaryOp(I);
}
@@ -2070,6 +2110,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
SC.Done(&I);
}
+ void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
+
// Handle multiplication by constant.
//
// Handle a special case of multiplication by constant that may have one or
@@ -2432,7 +2474,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
unsigned Alignment = 1;
std::tie(ShadowPtr, OriginPtr) =
getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
- setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_msld"));
+ setShadow(&I,
+ IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
} else {
setShadow(&I, getCleanShadow(&I));
}
@@ -2442,7 +2485,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (MS.TrackOrigins) {
if (PropagateShadow)
- setOrigin(&I, IRB.CreateLoad(OriginPtr));
+ setOrigin(&I, IRB.CreateLoad(MS.OriginTy, OriginPtr));
else
setOrigin(&I, getCleanOrigin());
}
@@ -2519,6 +2562,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return false;
}
+ void handleLifetimeStart(IntrinsicInst &I) {
+ if (!PoisonStack)
+ return;
+ DenseMap<Value *, AllocaInst *> AllocaForValue;
+ AllocaInst *AI =
+ llvm::findAllocaForValue(I.getArgOperand(1), AllocaForValue);
+ if (!AI)
+ InstrumentLifetimeStart = false;
+ LifetimeStartList.push_back(std::make_pair(&I, AI));
+ }
+
void handleBswap(IntrinsicInst &I) {
IRBuilder<> IRB(&I);
Value *Op = I.getArgOperand(0);
@@ -2650,7 +2704,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
: Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
Value *V1 = I.getOperand(0);
Value *V2 = I.getOperand(1);
- Value *Shift = IRB.CreateCall(I.getCalledValue(),
+ Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledValue(),
{IRB.CreateBitCast(S1, V1->getType()), V2});
Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
setShadow(&I, IRB.CreateOr(Shift, S2Conv));
@@ -2660,6 +2714,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Get an X86_MMX-sized vector type.
Type *getMMXVectorTy(unsigned EltSizeInBits) {
const unsigned X86_MMXSizeInBits = 64;
+ assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
+ "Illegal MMX vector element size");
return VectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
X86_MMXSizeInBits / EltSizeInBits);
}
@@ -2825,9 +2881,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (ClCheckAccessAddress)
insertShadowCheck(Addr, &I);
- Value *Shadow = IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_ldmxcsr");
- Value *Origin =
- MS.TrackOrigins ? IRB.CreateLoad(OriginPtr) : getCleanOrigin();
+ Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr");
+ Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr)
+ : getCleanOrigin();
insertShadowCheck(Shadow, Origin, &I);
}
@@ -2901,7 +2957,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Origin = IRB.CreateSelect(
IRB.CreateICmpNE(Acc, Constant::getNullValue(Acc->getType())),
- getOrigin(PassThru), IRB.CreateLoad(OriginPtr));
+ getOrigin(PassThru), IRB.CreateLoad(MS.OriginTy, OriginPtr));
setOrigin(&I, Origin);
} else {
@@ -2911,9 +2967,32 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return true;
}
+ // Instrument BMI / BMI2 intrinsics.
+ // All of these intrinsics are Z = I(X, Y)
+ // where the types of all operands and the result match, and are either i32 or i64.
+ // The following instrumentation happens to work for all of them:
+ // Sz = I(Sx, Y) | (sext (Sy != 0))
+ void handleBmiIntrinsic(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Type *ShadowTy = getShadowTy(&I);
+
+ // If any bit of the mask operand is poisoned, then the whole thing is.
+ Value *SMask = getShadow(&I, 1);
+ SMask = IRB.CreateSExt(IRB.CreateICmpNE(SMask, getCleanShadow(ShadowTy)),
+ ShadowTy);
+ // Apply the same intrinsic to the shadow of the first operand.
+ Value *S = IRB.CreateCall(I.getCalledFunction(),
+ {getShadow(&I, 0), I.getOperand(1)});
+ S = IRB.CreateOr(SMask, S);
+ setShadow(&I, S);
+ setOriginForNaryOp(I);
+ }
void visitIntrinsicInst(IntrinsicInst &I) {
switch (I.getIntrinsicID()) {
+ case Intrinsic::lifetime_start:
+ handleLifetimeStart(I);
+ break;
case Intrinsic::bswap:
handleBswap(I);
break;
@@ -3127,6 +3206,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleVectorComparePackedIntrinsic(I);
break;
+ case Intrinsic::x86_bmi_bextr_32:
+ case Intrinsic::x86_bmi_bextr_64:
+ case Intrinsic::x86_bmi_bzhi_32:
+ case Intrinsic::x86_bmi_bzhi_64:
+ case Intrinsic::x86_bmi_pdep_32:
+ case Intrinsic::x86_bmi_pdep_64:
+ case Intrinsic::x86_bmi_pext_32:
+ case Intrinsic::x86_bmi_pext_64:
+ handleBmiIntrinsic(I);
+ break;
+
case Intrinsic::is_constant:
// The result of llvm.is.constant() is always defined.
setShadow(&I, getCleanShadow(&I));
@@ -3143,21 +3233,21 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void visitCallSite(CallSite CS) {
Instruction &I = *CS.getInstruction();
assert(!I.getMetadata("nosanitize"));
- assert((CS.isCall() || CS.isInvoke()) && "Unknown type of CallSite");
+ assert((CS.isCall() || CS.isInvoke() || CS.isCallBr()) &&
+ "Unknown type of CallSite");
+ if (CS.isCallBr() || (CS.isCall() && cast<CallInst>(&I)->isInlineAsm())) {
+ // For inline asm (either a call to asm function, or callbr instruction),
+ // do the usual thing: check argument shadow and mark all outputs as
+ // clean. Note that any side effects of the inline asm that are not
+ // immediately visible in its constraints are not handled.
+ if (ClHandleAsmConservative && MS.CompileKernel)
+ visitAsmInstruction(I);
+ else
+ visitInstruction(I);
+ return;
+ }
if (CS.isCall()) {
CallInst *Call = cast<CallInst>(&I);
-
- // For inline asm, do the usual thing: check argument shadow and mark all
- // outputs as clean. Note that any side effects of the inline asm that are
- // not immediately visible in its constraints are not handled.
- if (Call->isInlineAsm()) {
- if (ClHandleAsmConservative && MS.CompileKernel)
- visitAsmInstruction(I);
- else
- visitInstruction(I);
- return;
- }
-
assert(!isa<IntrinsicInst>(&I) && "intrinsics are handled elsewhere");
// We are going to insert code that relies on the fact that the callee
@@ -3264,12 +3354,13 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
"Could not find insertion point for retval shadow load");
}
IRBuilder<> IRBAfter(&*NextInsn);
- Value *RetvalShadow =
- IRBAfter.CreateAlignedLoad(getShadowPtrForRetval(&I, IRBAfter),
- kShadowTLSAlignment, "_msret");
+ Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
+ getShadowTy(&I), getShadowPtrForRetval(&I, IRBAfter),
+ kShadowTLSAlignment, "_msret");
setShadow(&I, RetvalShadow);
if (MS.TrackOrigins)
- setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter)));
+ setOrigin(&I, IRBAfter.CreateLoad(MS.OriginTy,
+ getOriginPtrForRetval(IRBAfter)));
}
bool isAMustTailRetVal(Value *RetVal) {
@@ -3330,7 +3421,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
StackDescription.str());
}
- void instrumentAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
+ void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
if (PoisonStack && ClPoisonStackWithCall) {
IRB.CreateCall(MS.MsanPoisonStackFn,
{IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
@@ -3352,7 +3443,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
}
- void instrumentAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
+ void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
Value *Descr = getLocalVarDescription(I);
if (PoisonStack) {
IRB.CreateCall(MS.MsanPoisonAllocaFn,
@@ -3364,10 +3455,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
}
- void visitAllocaInst(AllocaInst &I) {
- setShadow(&I, getCleanShadow(&I));
- setOrigin(&I, getCleanOrigin());
- IRBuilder<> IRB(I.getNextNode());
+ void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) {
+ if (!InsPoint)
+ InsPoint = &I;
+ IRBuilder<> IRB(InsPoint->getNextNode());
const DataLayout &DL = F.getParent()->getDataLayout();
uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
@@ -3375,9 +3466,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Len = IRB.CreateMul(Len, I.getArraySize());
if (MS.CompileKernel)
- instrumentAllocaKmsan(I, IRB, Len);
+ poisonAllocaKmsan(I, IRB, Len);
else
- instrumentAllocaUserspace(I, IRB, Len);
+ poisonAllocaUserspace(I, IRB, Len);
+ }
+
+ void visitAllocaInst(AllocaInst &I) {
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ // We'll get to this alloca later unless it's poisoned at the corresponding
+ // llvm.lifetime.start.
+ AllocaSet.insert(&I);
}
void visitSelectInst(SelectInst& I) {
@@ -3409,7 +3508,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
D = CreateAppToShadowCast(IRB, D);
// Result shadow if condition shadow is 1.
- Sa1 = IRB.CreateOr(IRB.CreateXor(C, D), IRB.CreateOr(Sc, Sd));
+ Sa1 = IRB.CreateOr({IRB.CreateXor(C, D), Sc, Sd});
}
Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
setShadow(&I, Sa);
@@ -3525,10 +3624,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
/// Get the number of output arguments returned by pointers.
- int getNumOutputArgs(InlineAsm *IA, CallInst *CI) {
+ int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
int NumRetOutputs = 0;
int NumOutputs = 0;
- Type *RetTy = dyn_cast<Value>(CI)->getType();
+ Type *RetTy = dyn_cast<Value>(CB)->getType();
if (!RetTy->isVoidTy()) {
// Register outputs are returned via the CallInst return value.
StructType *ST = dyn_cast_or_null<StructType>(RetTy);
@@ -3568,24 +3667,24 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// corresponding CallInst has nO+nI+1 operands (the last operand is the
// function to be called).
const DataLayout &DL = F.getParent()->getDataLayout();
- CallInst *CI = dyn_cast<CallInst>(&I);
+ CallBase *CB = dyn_cast<CallBase>(&I);
IRBuilder<> IRB(&I);
- InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
- int OutputArgs = getNumOutputArgs(IA, CI);
+ InlineAsm *IA = cast<InlineAsm>(CB->getCalledValue());
+ int OutputArgs = getNumOutputArgs(IA, CB);
// The last operand of a CallInst is the function itself.
- int NumOperands = CI->getNumOperands() - 1;
+ int NumOperands = CB->getNumOperands() - 1;
// Check input arguments. Doing so before unpoisoning output arguments, so
// that we won't overwrite uninit values before checking them.
for (int i = OutputArgs; i < NumOperands; i++) {
- Value *Operand = CI->getOperand(i);
+ Value *Operand = CB->getOperand(i);
instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ false);
}
// Unpoison output arguments. This must happen before the actual InlineAsm
// call, so that the shadow for memory published in the asm() statement
// remains valid.
for (int i = 0; i < OutputArgs; i++) {
- Value *Operand = CI->getOperand(i);
+ Value *Operand = CB->getOperand(i);
instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ true);
}
@@ -3817,7 +3916,8 @@ struct VarArgAMD64Helper : public VarArgHelper {
// If there is a va_start in this function, make a backup copy of
// va_arg_tls somewhere in the function entry block.
IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
- VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+ VAArgOverflowSize =
+ IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
Value *CopySize =
IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
VAArgOverflowSize);
@@ -3836,11 +3936,13 @@ struct VarArgAMD64Helper : public VarArgHelper {
IRBuilder<> IRB(OrigInst->getNextNode());
Value *VAListTag = OrigInst->getArgOperand(0);
+ Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
ConstantInt::get(MS.IntptrTy, 16)),
- PointerType::get(Type::getInt64PtrTy(*MS.C), 0));
- Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
+ PointerType::get(RegSaveAreaPtrTy, 0));
+ Value *RegSaveAreaPtr =
+ IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
unsigned Alignment = 16;
std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
@@ -3851,11 +3953,13 @@ struct VarArgAMD64Helper : public VarArgHelper {
if (MS.TrackOrigins)
IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
Alignment, AMD64FpEndOffset);
+ Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
ConstantInt::get(MS.IntptrTy, 8)),
- PointerType::get(Type::getInt64PtrTy(*MS.C), 0));
- Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);
+ PointerType::get(OverflowArgAreaPtrTy, 0));
+ Value *OverflowArgAreaPtr =
+ IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
@@ -3957,7 +4061,7 @@ struct VarArgMIPS64Helper : public VarArgHelper {
assert(!VAArgSize && !VAArgTLSCopy &&
"finalizeInstrumentation called twice");
IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
- VAArgSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+ VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
VAArgSize);
@@ -3974,10 +4078,12 @@ struct VarArgMIPS64Helper : public VarArgHelper {
CallInst *OrigInst = VAStartInstrumentationList[i];
IRBuilder<> IRB(OrigInst->getNextNode());
Value *VAListTag = OrigInst->getArgOperand(0);
+ Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
Value *RegSaveAreaPtrPtr =
IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
- PointerType::get(Type::getInt64PtrTy(*MS.C), 0));
- Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
+ PointerType::get(RegSaveAreaPtrTy, 0));
+ Value *RegSaveAreaPtr =
+ IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
unsigned Alignment = 8;
std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
@@ -4127,7 +4233,7 @@ struct VarArgAArch64Helper : public VarArgHelper {
IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
ConstantInt::get(MS.IntptrTy, offset)),
Type::getInt64PtrTy(*MS.C));
- return IRB.CreateLoad(SaveAreaPtrPtr);
+ return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
}
// Retrieve a va_list field of 'int' size.
@@ -4137,7 +4243,7 @@ struct VarArgAArch64Helper : public VarArgHelper {
IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
ConstantInt::get(MS.IntptrTy, offset)),
Type::getInt32PtrTy(*MS.C));
- Value *SaveArea32 = IRB.CreateLoad(SaveAreaPtr);
+ Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
}
@@ -4148,7 +4254,8 @@ struct VarArgAArch64Helper : public VarArgHelper {
// If there is a va_start in this function, make a backup copy of
// va_arg_tls somewhere in the function entry block.
IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
- VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+ VAArgOverflowSize =
+ IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
Value *CopySize =
IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset),
VAArgOverflowSize);
@@ -4391,7 +4498,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
assert(!VAArgSize && !VAArgTLSCopy &&
"finalizeInstrumentation called twice");
IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
- VAArgSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+ VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
VAArgSize);
@@ -4408,10 +4515,12 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
CallInst *OrigInst = VAStartInstrumentationList[i];
IRBuilder<> IRB(OrigInst->getNextNode());
Value *VAListTag = OrigInst->getArgOperand(0);
+ Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
Value *RegSaveAreaPtrPtr =
IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
- PointerType::get(Type::getInt64PtrTy(*MS.C), 0));
- Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
+ PointerType::get(RegSaveAreaPtrTy, 0));
+ Value *RegSaveAreaPtr =
+ IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
unsigned Alignment = 8;
std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
@@ -4458,6 +4567,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
}
bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
+ if (!CompileKernel && (&F == MsanCtorFunction))
+ return false;
MemorySanitizerVisitor Visitor(F, *this, TLI);
// Clear out readonly/readnone attributes.
diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index f043325f5bba..6fec3c9c79ee 100644
--- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -1,9 +1,8 @@
//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -48,7 +47,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
#include "CFGMST.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
@@ -66,6 +64,7 @@
#include "llvm/Analysis/IndirectCallVisitor.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -107,6 +106,7 @@
#include "llvm/Support/JamCRC.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
#include <cassert>
@@ -133,6 +133,19 @@ STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
+STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
+STATISTIC(NumOfCSPGOSelectInsts,
+ "Number of select instruction instrumented in CSPGO.");
+STATISTIC(NumOfCSPGOMemIntrinsics,
+ "Number of mem intrinsics instrumented in CSPGO.");
+STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
+STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
+STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
+STATISTIC(NumOfCSPGOFunc,
+ "Number of functions having valid profile counts in CSPGO.");
+STATISTIC(NumOfCSPGOMismatch,
+ "Number of functions having mismatch profile in CSPGO.");
+STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
// Command line option to specify the file to read profile from. This is
// mainly used for testing.
@@ -384,7 +397,8 @@ class PGOInstrumentationGenLegacyPass : public ModulePass {
public:
static char ID;
- PGOInstrumentationGenLegacyPass() : ModulePass(ID) {
+ PGOInstrumentationGenLegacyPass(bool IsCS = false)
+ : ModulePass(ID), IsCS(IsCS) {
initializePGOInstrumentationGenLegacyPassPass(
*PassRegistry::getPassRegistry());
}
@@ -392,6 +406,8 @@ public:
StringRef getPassName() const override { return "PGOInstrumentationGenPass"; }
private:
+ // Is this is context-sensitive instrumentation.
+ bool IsCS;
bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -404,8 +420,8 @@ public:
static char ID;
// Provide the profile filename as the parameter.
- PGOInstrumentationUseLegacyPass(std::string Filename = "")
- : ModulePass(ID), ProfileFileName(std::move(Filename)) {
+ PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false)
+ : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) {
if (!PGOTestProfileFile.empty())
ProfileFileName = PGOTestProfileFile;
initializePGOInstrumentationUseLegacyPassPass(
@@ -416,14 +432,38 @@ public:
private:
std::string ProfileFileName;
+ // Is this is context-sensitive instrumentation use.
+ bool IsCS;
bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<BlockFrequencyInfoWrapperPass>();
}
};
+class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass {
+public:
+ static char ID;
+ StringRef getPassName() const override {
+ return "PGOInstrumentationGenCreateVarPass";
+ }
+ PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "")
+ : ModulePass(ID), InstrProfileOutput(CSInstrName) {
+ initializePGOInstrumentationGenCreateVarLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+private:
+ bool runOnModule(Module &M) override {
+ createProfileFileNameVar(M, InstrProfileOutput);
+ createIRLevelProfileFlagVar(M, true);
+ return false;
+ }
+ std::string InstrProfileOutput;
+};
+
} // end anonymous namespace
char PGOInstrumentationGenLegacyPass::ID = 0;
@@ -435,8 +475,8 @@ INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
"PGO instrumentation.", false, false)
-ModulePass *llvm::createPGOInstrumentationGenLegacyPass() {
- return new PGOInstrumentationGenLegacyPass();
+ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) {
+ return new PGOInstrumentationGenLegacyPass(IsCS);
}
char PGOInstrumentationUseLegacyPass::ID = 0;
@@ -445,11 +485,25 @@ INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
"Read PGO instrumentation profile.", false, false)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
"Read PGO instrumentation profile.", false, false)
-ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename) {
- return new PGOInstrumentationUseLegacyPass(Filename.str());
+ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename,
+ bool IsCS) {
+ return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS);
+}
+
+char PGOInstrumentationGenCreateVarLegacyPass::ID = 0;
+
+INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass,
+ "pgo-instr-gen-create-var",
+ "Create PGO instrumentation version variable for CSPGO.", false,
+ false)
+
+ModulePass *
+llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) {
+ return new PGOInstrumentationGenCreateVarLegacyPass(CSInstrName);
}
namespace {
@@ -490,6 +544,12 @@ struct BBInfo {
const std::string infoString() const {
return (Twine("Index=") + Twine(Index)).str();
}
+
+ // Empty function -- only applicable to UseBBInfo.
+ void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
+
+ // Empty function -- only applicable to UseBBInfo.
+ void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
};
// This class implements the CFG edges. Note the CFG can be a multi-graph.
@@ -497,6 +557,9 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
private:
Function &F;
+ // Is this is context-sensitive instrumentation.
+ bool IsCS;
+
// A map that stores the Comdat group in function F.
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
@@ -516,6 +579,10 @@ public:
// The Minimum Spanning Tree of function CFG.
CFGMST<Edge, BBInfo> MST;
+ // Collect all the BBs that will be instrumented, and store them in
+ // InstrumentBBs.
+ void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
+
// Give an edge, find the BB that will be instrumented.
// Return nullptr if there is no BB to be instrumented.
BasicBlock *getInstrBB(Edge *E);
@@ -536,15 +603,23 @@ public:
Function &Func,
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
- BlockFrequencyInfo *BFI = nullptr)
- : F(Func), ComdatMembers(ComdatMembers), ValueSites(IPVK_Last + 1),
- SIVisitor(Func), MIVisitor(Func), MST(F, BPI, BFI) {
+ BlockFrequencyInfo *BFI = nullptr, bool IsCS = false)
+ : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers),
+ ValueSites(IPVK_Last + 1), SIVisitor(Func), MIVisitor(Func),
+ MST(F, BPI, BFI) {
// This should be done before CFG hash computation.
SIVisitor.countSelects(Func);
MIVisitor.countMemIntrinsics(Func);
- NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
- NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
- ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func);
+ if (!IsCS) {
+ NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
+ NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
+ NumOfPGOBB += MST.BBInfos.size();
+ ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func);
+ } else {
+ NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
+ NumOfCSPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
+ NumOfCSPGOBB += MST.BBInfos.size();
+ }
ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func);
FuncName = getPGOFuncName(F);
@@ -553,28 +628,17 @@ public:
renameComdatFunction();
LLVM_DEBUG(dumpInfo("after CFGMST"));
- NumOfPGOBB += MST.BBInfos.size();
for (auto &E : MST.AllEdges) {
if (E->Removed)
continue;
- NumOfPGOEdge++;
+ IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
if (!E->InMST)
- NumOfPGOInstrument++;
+ IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
}
if (CreateGlobalVar)
FuncNameVar = createPGOFuncNameVar(F, FuncName);
}
-
- // Return the number of profile counters needed for the function.
- unsigned getNumCounters() {
- unsigned NumCounters = 0;
- for (auto &E : this->MST.AllEdges) {
- if (!E->InMST && !E->Removed)
- NumCounters++;
- }
- return NumCounters + SIVisitor.getNumOfSelectInsts();
- }
};
} // end anonymous namespace
@@ -598,9 +662,17 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
}
}
JC.update(Indexes);
+
+ // Hash format for context sensitive profile. Reserve 4 bits for other
+ // information.
FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
(uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
+ //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
(uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
+ // Reserve bit 60-63 for other information purpose.
+ FunctionHash &= 0x0FFFFFFFFFFFFFFF;
+ if (IsCS)
+ NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
<< " CRC = " << JC.getCRC()
<< ", Selects = " << SIVisitor.getNumOfSelectInsts()
@@ -681,6 +753,36 @@ void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
}
}
+// Collect all the BBs that will be instruments and return them in
+// InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
+template <class Edge, class BBInfo>
+void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
+ std::vector<BasicBlock *> &InstrumentBBs) {
+ // Use a worklist as we will update the vector during the iteration.
+ std::vector<Edge *> EdgeList;
+ EdgeList.reserve(MST.AllEdges.size());
+ for (auto &E : MST.AllEdges)
+ EdgeList.push_back(E.get());
+
+ for (auto &E : EdgeList) {
+ BasicBlock *InstrBB = getInstrBB(E);
+ if (InstrBB)
+ InstrumentBBs.push_back(InstrBB);
+ }
+
+ // Set up InEdges/OutEdges for all BBs.
+ for (auto &E : MST.AllEdges) {
+ if (E->Removed)
+ continue;
+ const BasicBlock *SrcBB = E->SrcBB;
+ const BasicBlock *DestBB = E->DestBB;
+ BBInfo &SrcInfo = getBBInfo(SrcBB);
+ BBInfo &DestInfo = getBBInfo(DestBB);
+ SrcInfo.addOutEdge(E.get());
+ DestInfo.addInEdge(E.get());
+ }
+}
+
// Given a CFG E to be instrumented, find which BB to place the instrumented
// code. The function will split the critical edge if necessary.
template <class Edge, class BBInfo>
@@ -696,46 +798,64 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
if (DestBB == nullptr)
return SrcBB;
+ auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
+ // There are basic blocks (such as catchswitch) cannot be instrumented.
+ // If the returned first insertion point is the end of BB, skip this BB.
+ if (BB->getFirstInsertionPt() == BB->end())
+ return nullptr;
+ return BB;
+ };
+
// Instrument the SrcBB if it has a single successor,
// otherwise, the DestBB if this is not a critical edge.
Instruction *TI = SrcBB->getTerminator();
if (TI->getNumSuccessors() <= 1)
- return SrcBB;
+ return canInstrument(SrcBB);
if (!E->IsCritical)
- return DestBB;
+ return canInstrument(DestBB);
+ unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
+ BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
+ if (!InstrBB) {
+ LLVM_DEBUG(
+ dbgs() << "Fail to split critical edge: not instrument this edge.\n");
+ return nullptr;
+ }
// For a critical edge, we have to split. Instrument the newly
// created BB.
- NumOfPGOSplit++;
+ IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
<< " --> " << getBBInfo(DestBB).Index << "\n");
- unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
- BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
- assert(InstrBB && "Critical edge is not split");
-
+ // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
+ MST.addEdge(SrcBB, InstrBB, 0);
+ // Second one: Add new edge of InstrBB->DestBB.
+ Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
+ NewEdge1.InMST = true;
E->Removed = true;
- return InstrBB;
+
+ return canInstrument(InstrBB);
}
// Visit all edge and instrument the edges not in MST, and do value profiling.
// Critical edges will be split.
static void instrumentOneFunc(
Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI,
- std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
+ std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
+ bool IsCS) {
// Split indirectbr critical edges here before computing the MST rather than
// later in getInstrBB() to avoid invalidating it.
SplitIndirectBrCriticalEdges(F, BPI, BFI);
+
FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI,
- BFI);
- unsigned NumCounters = FuncInfo.getNumCounters();
+ BFI, IsCS);
+ std::vector<BasicBlock *> InstrumentBBs;
+ FuncInfo.getInstrumentBBs(InstrumentBBs);
+ unsigned NumCounters =
+ InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
uint32_t I = 0;
Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
- for (auto &E : FuncInfo.MST.AllEdges) {
- BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get());
- if (!InstrBB)
- continue;
-
+ for (auto *InstrBB : InstrumentBBs) {
IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
assert(Builder.GetInsertPoint() != InstrBB->end() &&
"Cannot get the Instrumentation point");
@@ -831,6 +951,18 @@ struct UseBBInfo : public BBInfo {
return BBInfo::infoString();
return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str();
}
+
+ // Add an OutEdge and update the edge count.
+ void addOutEdge(PGOUseEdge *E) {
+ OutEdges.push_back(E);
+ UnknownCountOutEdge++;
+ }
+
+ // Add an InEdge and update the edge count.
+ void addInEdge(PGOUseEdge *E) {
+ InEdges.push_back(E);
+ UnknownCountInEdge++;
+ }
};
} // end anonymous namespace
@@ -853,10 +985,10 @@ public:
PGOUseFunc(Function &Func, Module *Modu,
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
BranchProbabilityInfo *BPI = nullptr,
- BlockFrequencyInfo *BFIin = nullptr)
+ BlockFrequencyInfo *BFIin = nullptr, bool IsCS = false)
: F(Func), M(Modu), BFI(BFIin),
- FuncInfo(Func, ComdatMembers, false, BPI, BFIin),
- FreqAttr(FFA_Normal) {}
+ FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS),
+ FreqAttr(FFA_Normal), IsCS(IsCS) {}
// Read counts for the instrumented BB from profile.
bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros);
@@ -929,8 +1061,11 @@ private:
// Function hotness info derived from profile.
FuncFreqAttr FreqAttr;
- // Find the Instrumented BB and set the value.
- void setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
+ // Is to use the context sensitive profile.
+ bool IsCS;
+
+ // Find the Instrumented BB and set the value. Return false on error.
+ bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
// Set the edge counter value for the unknown edge -- there should be only
// one unknown edge.
@@ -959,41 +1094,64 @@ private:
} // end anonymous namespace
// Visit all the edges and assign the count value for the instrumented
-// edges and the BB.
-void PGOUseFunc::setInstrumentedCounts(
+// edges and the BB. Return false on error.
+bool PGOUseFunc::setInstrumentedCounts(
const std::vector<uint64_t> &CountFromProfile) {
- assert(FuncInfo.getNumCounters() == CountFromProfile.size());
- // Use a worklist as we will update the vector during the iteration.
- std::vector<PGOUseEdge *> WorkList;
- for (auto &E : FuncInfo.MST.AllEdges)
- WorkList.push_back(E.get());
+ std::vector<BasicBlock *> InstrumentBBs;
+ FuncInfo.getInstrumentBBs(InstrumentBBs);
+ unsigned NumCounters =
+ InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
+ // The number of counters here should match the number of counters
+ // in profile. Return if they mismatch.
+ if (NumCounters != CountFromProfile.size()) {
+ return false;
+ }
+ // Set the profile count to the Instrumented BBs.
uint32_t I = 0;
- for (auto &E : WorkList) {
- BasicBlock *InstrBB = FuncInfo.getInstrBB(E);
- if (!InstrBB)
- continue;
+ for (BasicBlock *InstrBB : InstrumentBBs) {
uint64_t CountValue = CountFromProfile[I++];
- if (!E->Removed) {
- getBBInfo(InstrBB).setBBInfoCount(CountValue);
- E->setEdgeCount(CountValue);
- continue;
- }
-
- // Need to add two new edges.
- BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
- BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
- // Add new edge of SrcBB->InstrBB.
- PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0);
- NewEdge.setEdgeCount(CountValue);
- // Add new edge of InstrBB->DestBB.
- PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0);
- NewEdge1.setEdgeCount(CountValue);
- NewEdge1.InMST = true;
- getBBInfo(InstrBB).setBBInfoCount(CountValue);
+ UseBBInfo &Info = getBBInfo(InstrBB);
+ Info.setBBInfoCount(CountValue);
}
ProfileCountSize = CountFromProfile.size();
CountPosition = I;
+
+ // Set the edge count and update the count of unknown edges for BBs.
+ auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
+ E->setEdgeCount(Value);
+ this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
+ this->getBBInfo(E->DestBB).UnknownCountInEdge--;
+ };
+
+ // Set the profile count the Instrumented edges. There are BBs that not in
+ // MST but not instrumented. Need to set the edge count value so that we can
+ // populate the profile counts later.
+ for (auto &E : FuncInfo.MST.AllEdges) {
+ if (E->Removed || E->InMST)
+ continue;
+ const BasicBlock *SrcBB = E->SrcBB;
+ UseBBInfo &SrcInfo = getBBInfo(SrcBB);
+
+ // If only one out-edge, the edge profile count should be the same as BB
+ // profile count.
+ if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
+ setEdgeCount(E.get(), SrcInfo.CountValue);
+ else {
+ const BasicBlock *DestBB = E->DestBB;
+ UseBBInfo &DestInfo = getBBInfo(DestBB);
+ // If only one in-edge, the edge profile count should be the same as BB
+ // profile count.
+ if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
+ setEdgeCount(E.get(), DestInfo.CountValue);
+ }
+ if (E->CountValid)
+ continue;
+ // E's count should have been set from profile. If not, this meenas E skips
+ // the instrumentation. We set the count to 0.
+ setEdgeCount(E.get(), 0);
+ }
+ return true;
}
// Set the count value for the unknown edge. There should be one and only one
@@ -1022,23 +1180,31 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros)
handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
auto Err = IPE.get();
bool SkipWarning = false;
+ LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
+ << FuncInfo.FuncName << ": ");
if (Err == instrprof_error::unknown_function) {
- NumOfPGOMissing++;
+ IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
SkipWarning = !PGOWarnMissing;
+ LLVM_DEBUG(dbgs() << "unknown function");
} else if (Err == instrprof_error::hash_mismatch ||
Err == instrprof_error::malformed) {
- NumOfPGOMismatch++;
+ IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
SkipWarning =
NoPGOWarnMismatch ||
(NoPGOWarnMismatchComdat &&
(F.hasComdat() ||
F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
+ LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
}
+ LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
if (SkipWarning)
return;
- std::string Msg = IPE.message() + std::string(" ") + F.getName().str();
+ std::string Msg = IPE.message() + std::string(" ") + F.getName().str() +
+ std::string(" Hash = ") +
+ std::to_string(FuncInfo.FunctionHash);
+
Ctx.diagnose(
DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
});
@@ -1047,7 +1213,7 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros)
ProfileRecord = std::move(Result.get());
std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
- NumOfPGOFunc++;
+ IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
uint64_t ValueSum = 0;
for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
@@ -1061,34 +1227,23 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros)
getBBInfo(nullptr).UnknownCountOutEdge = 2;
getBBInfo(nullptr).UnknownCountInEdge = 2;
- setInstrumentedCounts(CountFromProfile);
- ProgramMaxCount = PGOReader->getMaximumFunctionCount();
+ if (!setInstrumentedCounts(CountFromProfile)) {
+ LLVM_DEBUG(
+ dbgs() << "Inconsistent number of counts, skipping this function");
+ Ctx.diagnose(DiagnosticInfoPGOProfile(
+ M->getName().data(),
+ Twine("Inconsistent number of counts in ") + F.getName().str()
+ + Twine(": the profile may be stale or there is a function name collision."),
+ DS_Warning));
+ return false;
+ }
+ ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
return true;
}
// Populate the counters from instrumented BBs to all BBs.
// In the end of this operation, all BBs should have a valid count value.
void PGOUseFunc::populateCounters() {
- // First set up Count variable for all BBs.
- for (auto &E : FuncInfo.MST.AllEdges) {
- if (E->Removed)
- continue;
-
- const BasicBlock *SrcBB = E->SrcBB;
- const BasicBlock *DestBB = E->DestBB;
- UseBBInfo &SrcInfo = getBBInfo(SrcBB);
- UseBBInfo &DestInfo = getBBInfo(DestBB);
- SrcInfo.OutEdges.push_back(E.get());
- DestInfo.InEdges.push_back(E.get());
- SrcInfo.UnknownCountOutEdge++;
- DestInfo.UnknownCountInEdge++;
-
- if (!E->CountValid)
- continue;
- DestInfo.UnknownCountInEdge--;
- SrcInfo.UnknownCountOutEdge--;
- }
-
bool Changes = true;
unsigned NumPasses = 0;
while (Changes) {
@@ -1167,7 +1322,8 @@ void PGOUseFunc::populateCounters() {
// Assign the scaled count values to the BB with multiple out edges.
void PGOUseFunc::setBranchWeights() {
// Generate MD_prof metadata for every branch instruction.
- LLVM_DEBUG(dbgs() << "\nSetting branch weights.\n");
+ LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
+ << " IsCS=" << IsCS << "\n");
for (auto &BB : F) {
Instruction *TI = BB.getTerminator();
if (TI->getNumSuccessors() < 2)
@@ -1175,6 +1331,7 @@ void PGOUseFunc::setBranchWeights() {
if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
isa<IndirectBrInst>(TI)))
continue;
+
if (getBBInfo(&BB).CountValue == 0)
continue;
@@ -1282,7 +1439,7 @@ void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) {
Type *Int64Ty = Builder.getInt64Ty();
Type *I8PtrTy = Builder.getInt8PtrTy();
Value *Length = MI.getLength();
- assert(!dyn_cast<ConstantInt>(Length));
+ assert(!isa<ConstantInt>(Length));
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
{ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
@@ -1325,8 +1482,14 @@ void PGOUseFunc::annotateValueSites() {
annotateValueSites(Kind);
}
+static const char *ValueProfKindDescr[] = {
+#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
// Annotate the instructions for a specific value kind.
void PGOUseFunc::annotateValueSites(uint32_t Kind) {
+ assert(Kind <= IPVK_Last);
unsigned ValueSiteIndex = 0;
auto &ValueSites = FuncInfo.ValueSites[Kind];
unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
@@ -1334,8 +1497,10 @@ void PGOUseFunc::annotateValueSites(uint32_t Kind) {
auto &Ctx = M->getContext();
Ctx.diagnose(DiagnosticInfoPGOProfile(
M->getName().data(),
- Twine("Inconsistent number of value sites for kind = ") + Twine(Kind) +
- " in " + F.getName().str(),
+ Twine("Inconsistent number of value sites for ") +
+ Twine(ValueProfKindDescr[Kind]) +
+ Twine(" profiling in \"") + F.getName().str() +
+ Twine("\", possibly due to the use of a stale profile."),
DS_Warning));
return;
}
@@ -1352,24 +1517,6 @@ void PGOUseFunc::annotateValueSites(uint32_t Kind) {
}
}
-// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
-// aware this is an ir_level profile so it can set the version flag.
-static void createIRLevelProfileFlagVariable(Module &M) {
- Type *IntTy64 = Type::getInt64Ty(M.getContext());
- uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
- auto IRLevelVersionVariable = new GlobalVariable(
- M, IntTy64, true, GlobalVariable::ExternalLinkage,
- Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)),
- INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
- IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility);
- Triple TT(M.getTargetTriple());
- if (!TT.supportsCOMDAT())
- IRLevelVersionVariable->setLinkage(GlobalValue::WeakAnyLinkage);
- else
- IRLevelVersionVariable->setComdat(M.getOrInsertComdat(
- StringRef(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR))));
-}
-
// Collect the set of members for each Comdat in module M and store
// in ComdatMembers.
static void collectComdatMembers(
@@ -1390,8 +1537,11 @@ static void collectComdatMembers(
static bool InstrumentAllFunctions(
Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
- function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) {
- createIRLevelProfileFlagVariable(M);
+ function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
+ // For the context-sensitve instrumentation, we should have a separated pass
+ // (before LTO/ThinLTO linking) to create these variables.
+ if (!IsCS)
+ createIRLevelProfileFlagVar(M, /* IsCS */ false);
std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
collectComdatMembers(M, ComdatMembers);
@@ -1400,11 +1550,18 @@ static bool InstrumentAllFunctions(
continue;
auto *BPI = LookupBPI(F);
auto *BFI = LookupBFI(F);
- instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers);
+ instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS);
}
return true;
}
+PreservedAnalyses
+PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
+ createProfileFileNameVar(M, CSInstrName);
+ createIRLevelProfileFlagVar(M, /* IsCS */ true);
+ return PreservedAnalyses::all();
+}
+
bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
if (skipModule(M))
return false;
@@ -1415,7 +1572,7 @@ bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
auto LookupBFI = [this](Function &F) {
return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
};
- return InstrumentAllFunctions(M, LookupBPI, LookupBFI);
+ return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS);
}
PreservedAnalyses PGOInstrumentationGen::run(Module &M,
@@ -1429,7 +1586,7 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M,
return &FAM.getResult<BlockFrequencyAnalysis>(F);
};
- if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI))
+ if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
@@ -1438,7 +1595,7 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M,
static bool annotateAllFunctions(
Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
- function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) {
+ function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
LLVM_DEBUG(dbgs() << "Read in profile counters: ");
auto &Ctx = M.getContext();
// Read the counter array from file.
@@ -1459,6 +1616,9 @@ static bool annotateAllFunctions(
StringRef("Cannot get PGOReader")));
return false;
}
+ if (!PGOReader->hasCSIRLevelProfile() && IsCS)
+ return false;
+
// TODO: might need to change the warning once the clang option is finalized.
if (!PGOReader->isIRLevelProfile()) {
Ctx.diagnose(DiagnosticInfoPGOProfile(
@@ -1478,7 +1638,7 @@ static bool annotateAllFunctions(
// Split indirectbr critical edges here before computing the MST rather than
// later in getInstrBB() to avoid invalidating it.
SplitIndirectBrCriticalEdges(F, BPI, BFI);
- PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI);
+ PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, IsCS);
bool AllZeros = false;
if (!Func.readCounters(PGOReader.get(), AllZeros))
continue;
@@ -1526,7 +1686,10 @@ static bool annotateAllFunctions(
}
}
}
- M.setProfileSummary(PGOReader->getSummary().getMD(M.getContext()));
+ M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
+ IsCS ? ProfileSummary::PSK_CSInstr
+ : ProfileSummary::PSK_Instr);
+
// Set function hotness attribute from the profile.
// We have to apply these attributes at the end because their presence
// can affect the BranchProbabilityInfo of any callers, resulting in an
@@ -1545,9 +1708,10 @@ static bool annotateAllFunctions(
}
PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename,
- std::string RemappingFilename)
+ std::string RemappingFilename,
+ bool IsCS)
: ProfileFileName(std::move(Filename)),
- ProfileRemappingFileName(std::move(RemappingFilename)) {
+ ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) {
if (!PGOTestProfileFile.empty())
ProfileFileName = PGOTestProfileFile;
if (!PGOTestProfileRemappingFile.empty())
@@ -1567,7 +1731,7 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
};
if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
- LookupBPI, LookupBFI))
+ LookupBPI, LookupBFI, IsCS))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
@@ -1584,7 +1748,8 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
};
- return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI);
+ return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI,
+ IsCS);
}
static std::string getSimpleNodeName(const BasicBlock *Node) {
diff --git a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
index 2c71e75dadcc..188f95b4676b 100644
--- a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
+++ b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
@@ -1,9 +1,8 @@
//===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,12 +19,12 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
diff --git a/lib/Transforms/Instrumentation/PoisonChecking.cpp b/lib/Transforms/Instrumentation/PoisonChecking.cpp
new file mode 100644
index 000000000000..81d92e724c7d
--- /dev/null
+++ b/lib/Transforms/Instrumentation/PoisonChecking.cpp
@@ -0,0 +1,357 @@
+//===- PoisonChecking.cpp - -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements a transform pass which instruments IR such that poison semantics
+// are made explicit. That is, it provides a (possibly partial) executable
+// semantics for every instruction w.r.t. poison as specified in the LLVM
+// LangRef. There are obvious parallels to the sanitizer tools, but this pass
+// is focused purely on the semantics of LLVM IR, not any particular source
+// language. If you're looking for something to see if your C/C++ contains
+// UB, this is not it.
+//
+// The rewritten semantics of each instruction will include the following
+// components:
+//
+// 1) The original instruction, unmodified.
+// 2) A propagation rule which translates dynamic information about the poison
+// state of each input to whether the dynamic output of the instruction
+// produces poison.
+// 3) A flag validation rule which validates any poison producing flags on the
+// instruction itself (e.g. checks for overflow on nsw).
+// 4) A check rule which traps (to a handler function) if this instruction must
+// execute undefined behavior given the poison state of it's inputs.
+//
+// At the moment, the UB detection is done in a best effort manner; that is,
+// the resulting code may produce a false negative result (not report UB when
+// it actually exists according to the LangRef spec), but should never produce
+// a false positive (report UB where it doesn't exist). The intention is to
+// eventually support a "strict" mode which never dynamically reports a false
+// negative at the cost of rejecting some valid inputs to translation.
+//
+// Use cases for this pass include:
+// - Understanding (and testing!) the implications of the definition of poison
+// from the LangRef.
+// - Validating the output of a IR fuzzer to ensure that all programs produced
+// are well defined on the specific input used.
+// - Finding/confirming poison specific miscompiles by checking the poison
+// status of an input/IR pair is the same before and after an optimization
+// transform.
+// - Checking that a bugpoint reduction does not introduce UB which didn't
+// exist in the original program being reduced.
+//
+// The major sources of inaccuracy are currently:
+// - Most validation rules not yet implemented for instructions with poison
+// relavant flags. At the moment, only nsw/nuw on add/sub are supported.
+// - UB which is control dependent on a branch on poison is not yet
+// reported. Currently, only data flow dependence is modeled.
+// - Poison which is propagated through memory is not modeled. As such,
+// storing poison to memory and then reloading it will cause a false negative
+// as we consider the reloaded value to not be poisoned.
+// - Poison propagation across function boundaries is not modeled. At the
+// moment, all arguments and return values are assumed not to be poison.
+// - Undef is not modeled. In particular, the optimizer's freedom to pick
+// concrete values for undef bits so as to maximize potential for producing
+// poison is not modeled.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "poison-checking"
+
+static cl::opt<bool>
+LocalCheck("poison-checking-function-local",
+ cl::init(false),
+ cl::desc("Check that returns are non-poison (for testing)"));
+
+
+static bool isConstantFalse(Value* V) {
+ assert(V->getType()->isIntegerTy(1));
+ if (auto *CI = dyn_cast<ConstantInt>(V))
+ return CI->isZero();
+ return false;
+}
+
+static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
+ if (Ops.size() == 0)
+ return B.getFalse();
+ unsigned i = 0;
+ for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
+ if (i == Ops.size())
+ return B.getFalse();
+ Value *Accum = Ops[i++];
+ for (; i < Ops.size(); i++)
+ if (!isConstantFalse(Ops[i]))
+ Accum = B.CreateOr(Accum, Ops[i]);
+ return Accum;
+}
+
+static void generatePoisonChecksForBinOp(Instruction &I,
+ SmallVector<Value*, 2> &Checks) {
+ assert(isa<BinaryOperator>(I));
+
+ IRBuilder<> B(&I);
+ Value *LHS = I.getOperand(0);
+ Value *RHS = I.getOperand(1);
+ switch (I.getOpcode()) {
+ default:
+ return;
+ case Instruction::Add: {
+ if (I.hasNoSignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ if (I.hasNoUnsignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ break;
+ }
+ case Instruction::Sub: {
+ if (I.hasNoSignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ if (I.hasNoUnsignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ break;
+ }
+ case Instruction::Mul: {
+ if (I.hasNoSignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ if (I.hasNoUnsignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ break;
+ }
+ case Instruction::UDiv: {
+ if (I.isExact()) {
+ auto *Check =
+ B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
+ ConstantInt::get(LHS->getType(), 0));
+ Checks.push_back(Check);
+ }
+ break;
+ }
+ case Instruction::SDiv: {
+ if (I.isExact()) {
+ auto *Check =
+ B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
+ ConstantInt::get(LHS->getType(), 0));
+ Checks.push_back(Check);
+ }
+ break;
+ }
+ case Instruction::AShr:
+ case Instruction::LShr:
+ case Instruction::Shl: {
+ Value *ShiftCheck =
+ B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
+ ConstantInt::get(RHS->getType(),
+ LHS->getType()->getScalarSizeInBits()));
+ Checks.push_back(ShiftCheck);
+ break;
+ }
+ };
+}
+
+static Value* generatePoisonChecks(Instruction &I) {
+ IRBuilder<> B(&I);
+ SmallVector<Value*, 2> Checks;
+ if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
+ generatePoisonChecksForBinOp(I, Checks);
+
+ // Handle non-binops seperately
+ switch (I.getOpcode()) {
+ default:
+ break;
+ case Instruction::ExtractElement: {
+ Value *Vec = I.getOperand(0);
+ if (Vec->getType()->getVectorIsScalable())
+ break;
+ Value *Idx = I.getOperand(1);
+ unsigned NumElts = Vec->getType()->getVectorNumElements();
+ Value *Check =
+ B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
+ ConstantInt::get(Idx->getType(), NumElts));
+ Checks.push_back(Check);
+ break;
+ }
+ case Instruction::InsertElement: {
+ Value *Vec = I.getOperand(0);
+ if (Vec->getType()->getVectorIsScalable())
+ break;
+ Value *Idx = I.getOperand(2);
+ unsigned NumElts = Vec->getType()->getVectorNumElements();
+ Value *Check =
+ B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
+ ConstantInt::get(Idx->getType(), NumElts));
+ Checks.push_back(Check);
+ break;
+ }
+ };
+ return buildOrChain(B, Checks);
+}
+
+static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
+ auto Itr = ValToPoison.find(V);
+ if (Itr != ValToPoison.end())
+ return Itr->second;
+ if (isa<Constant>(V)) {
+ return ConstantInt::getFalse(V->getContext());
+ }
+ // Return false for unknwon values - this implements a non-strict mode where
+ // unhandled IR constructs are simply considered to never produce poison. At
+ // some point in the future, we probably want a "strict mode" for testing if
+ // nothing else.
+ return ConstantInt::getFalse(V->getContext());
+}
+
+static void CreateAssert(IRBuilder<> &B, Value *Cond) {
+ assert(Cond->getType()->isIntegerTy(1));
+ if (auto *CI = dyn_cast<ConstantInt>(Cond))
+ if (CI->isAllOnesValue())
+ return;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ M->getOrInsertFunction("__poison_checker_assert",
+ Type::getVoidTy(M->getContext()),
+ Type::getInt1Ty(M->getContext()));
+ Function *TrapFunc = M->getFunction("__poison_checker_assert");
+ B.CreateCall(TrapFunc, Cond);
+}
+
+static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
+ assert(Cond->getType()->isIntegerTy(1));
+ CreateAssert(B, B.CreateNot(Cond));
+}
+
+static bool rewrite(Function &F) {
+ auto * const Int1Ty = Type::getInt1Ty(F.getContext());
+
+ DenseMap<Value *, Value *> ValToPoison;
+
+ for (BasicBlock &BB : F)
+ for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
+ auto *OldPHI = cast<PHINode>(&*I);
+ auto *NewPHI = PHINode::Create(Int1Ty,
+ OldPHI->getNumIncomingValues());
+ for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
+ NewPHI->addIncoming(UndefValue::get(Int1Ty),
+ OldPHI->getIncomingBlock(i));
+ NewPHI->insertBefore(OldPHI);
+ ValToPoison[OldPHI] = NewPHI;
+ }
+
+ for (BasicBlock &BB : F)
+ for (Instruction &I : BB) {
+ if (isa<PHINode>(I)) continue;
+
+ IRBuilder<> B(cast<Instruction>(&I));
+
+ // Note: There are many more sources of documented UB, but this pass only
+ // attempts to find UB triggered by propagation of poison.
+ if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I)))
+ CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
+
+ if (LocalCheck)
+ if (auto *RI = dyn_cast<ReturnInst>(&I))
+ if (RI->getNumOperands() != 0) {
+ Value *Op = RI->getOperand(0);
+ CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
+ }
+
+ SmallVector<Value*, 4> Checks;
+ if (propagatesFullPoison(&I))
+ for (Value *V : I.operands())
+ Checks.push_back(getPoisonFor(ValToPoison, V));
+
+ if (auto *Check = generatePoisonChecks(I))
+ Checks.push_back(Check);
+ ValToPoison[&I] = buildOrChain(B, Checks);
+ }
+
+ for (BasicBlock &BB : F)
+ for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
+ auto *OldPHI = cast<PHINode>(&*I);
+ if (!ValToPoison.count(OldPHI))
+ continue; // skip the newly inserted phis
+ auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
+ for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
+ auto *OldVal = OldPHI->getIncomingValue(i);
+ NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
+ }
+ }
+ return true;
+}
+
+
+PreservedAnalyses PoisonCheckingPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ bool Changed = false;
+ for (auto &F : M)
+ Changed |= rewrite(F);
+
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+PreservedAnalyses PoisonCheckingPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+
+/* Major TODO Items:
+ - Control dependent poison UB
+ - Strict mode - (i.e. must analyze every operand)
+ - Poison through memory
+ - Function ABIs
+ - Full coverage of intrinsics, etc.. (ouch)
+
+ Instructions w/Unclear Semantics:
+ - shufflevector - It would seem reasonable for an out of bounds mask element
+ to produce poison, but the LangRef does not state.
+ - and/or - It would seem reasonable for poison to propagate from both
+ arguments, but LangRef doesn't state and propagatesFullPoison doesn't
+ include these two.
+ - all binary ops w/vector operands - The likely interpretation would be that
+ any element overflowing should produce poison for the entire result, but
+ the LangRef does not state.
+ - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems
+ strange that only certian flags should be documented as producing poison.
+
+ Cases of clear poison semantics not yet implemented:
+ - Exact flags on ashr/lshr produce poison
+ - NSW/NUW flags on shl produce poison
+ - Inbounds flag on getelementptr produce poison
+ - fptosi/fptoui (out of bounds input) produce poison
+ - Scalable vector types for insertelement/extractelement
+ - Floating point binary ops w/fmf nnan/noinfs flags produce poison
+ */
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 0ba8d5765e8c..ca0cb4bdbe84 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -1,9 +1,8 @@
//===-- SanitizerCoverage.cpp - coverage instrumentation for sanitizers ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -62,7 +61,10 @@ static const char *const SanCovTraceDiv4 = "__sanitizer_cov_trace_div4";
static const char *const SanCovTraceDiv8 = "__sanitizer_cov_trace_div8";
static const char *const SanCovTraceGep = "__sanitizer_cov_trace_gep";
static const char *const SanCovTraceSwitchName = "__sanitizer_cov_trace_switch";
-static const char *const SanCovModuleCtorName = "sancov.module_ctor";
+static const char *const SanCovModuleCtorTracePcGuardName =
+ "sancov.module_ctor_trace_pc_guard";
+static const char *const SanCovModuleCtor8bitCountersName =
+ "sancov.module_ctor_8bit_counters";
static const uint64_t SanCtorAndDtorPriority = 2;
static const char *const SanCovTracePCGuardName =
@@ -210,8 +212,9 @@ private:
void CreateFunctionLocalArrays(Function &F, ArrayRef<BasicBlock *> AllBlocks);
void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx,
bool IsLeafFunc = true);
- Function *CreateInitCallsForSections(Module &M, const char *InitFunctionName,
- Type *Ty, const char *Section);
+ Function *CreateInitCallsForSections(Module &M, const char *CtorName,
+ const char *InitFunctionName, Type *Ty,
+ const char *Section);
std::pair<Value *, Value *> CreateSecStartEnd(Module &M, const char *Section,
Type *Ty);
@@ -223,13 +226,13 @@ private:
std::string getSectionName(const std::string &Section) const;
std::string getSectionStart(const std::string &Section) const;
std::string getSectionEnd(const std::string &Section) const;
- Function *SanCovTracePCIndir;
- Function *SanCovTracePC, *SanCovTracePCGuard;
- Function *SanCovTraceCmpFunction[4];
- Function *SanCovTraceConstCmpFunction[4];
- Function *SanCovTraceDivFunction[2];
- Function *SanCovTraceGepFunction;
- Function *SanCovTraceSwitchFunction;
+ FunctionCallee SanCovTracePCIndir;
+ FunctionCallee SanCovTracePC, SanCovTracePCGuard;
+ FunctionCallee SanCovTraceCmpFunction[4];
+ FunctionCallee SanCovTraceConstCmpFunction[4];
+ FunctionCallee SanCovTraceDivFunction[2];
+ FunctionCallee SanCovTraceGepFunction;
+ FunctionCallee SanCovTraceSwitchFunction;
GlobalVariable *SanCovLowestStack;
InlineAsm *EmptyAsm;
Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy,
@@ -270,24 +273,25 @@ SanitizerCoverageModule::CreateSecStartEnd(Module &M, const char *Section,
// Account for the fact that on windows-msvc __start_* symbols actually
// point to a uint64_t before the start of the array.
auto SecStartI8Ptr = IRB.CreatePointerCast(SecStart, Int8PtrTy);
- auto GEP = IRB.CreateGEP(SecStartI8Ptr,
+ auto GEP = IRB.CreateGEP(Int8Ty, SecStartI8Ptr,
ConstantInt::get(IntptrTy, sizeof(uint64_t)));
return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEndPtr);
}
Function *SanitizerCoverageModule::CreateInitCallsForSections(
- Module &M, const char *InitFunctionName, Type *Ty,
+ Module &M, const char *CtorName, const char *InitFunctionName, Type *Ty,
const char *Section) {
auto SecStartEnd = CreateSecStartEnd(M, Section, Ty);
auto SecStart = SecStartEnd.first;
auto SecEnd = SecStartEnd.second;
Function *CtorFunc;
std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
- M, SanCovModuleCtorName, InitFunctionName, {Ty, Ty}, {SecStart, SecEnd});
+ M, CtorName, InitFunctionName, {Ty, Ty}, {SecStart, SecEnd});
+ assert(CtorFunc->getName() == CtorName);
if (TargetTriple.supportsCOMDAT()) {
// Use comdat to dedup CtorFunc.
- CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName));
+ CtorFunc->setComdat(M.getOrInsertComdat(CtorName));
appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc);
} else {
appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
@@ -329,77 +333,74 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
Int16Ty = IRB.getInt16Ty();
Int8Ty = IRB.getInt8Ty();
- SanCovTracePCIndir = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy));
+ SanCovTracePCIndir =
+ M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy);
+ // Make sure smaller parameters are zero-extended to i64 as required by the
+ // x86_64 ABI.
+ AttributeList SanCovTraceCmpZeroExtAL;
+ if (TargetTriple.getArch() == Triple::x86_64) {
+ SanCovTraceCmpZeroExtAL =
+ SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 0, Attribute::ZExt);
+ SanCovTraceCmpZeroExtAL =
+ SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 1, Attribute::ZExt);
+ }
+
SanCovTraceCmpFunction[0] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceCmp1, VoidTy, IRB.getInt8Ty(), IRB.getInt8Ty()));
- SanCovTraceCmpFunction[1] = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovTraceCmp2, VoidTy, IRB.getInt16Ty(),
- IRB.getInt16Ty()));
- SanCovTraceCmpFunction[2] = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovTraceCmp4, VoidTy, IRB.getInt32Ty(),
- IRB.getInt32Ty()));
+ M.getOrInsertFunction(SanCovTraceCmp1, SanCovTraceCmpZeroExtAL, VoidTy,
+ IRB.getInt8Ty(), IRB.getInt8Ty());
+ SanCovTraceCmpFunction[1] =
+ M.getOrInsertFunction(SanCovTraceCmp2, SanCovTraceCmpZeroExtAL, VoidTy,
+ IRB.getInt16Ty(), IRB.getInt16Ty());
+ SanCovTraceCmpFunction[2] =
+ M.getOrInsertFunction(SanCovTraceCmp4, SanCovTraceCmpZeroExtAL, VoidTy,
+ IRB.getInt32Ty(), IRB.getInt32Ty());
SanCovTraceCmpFunction[3] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceCmp8, VoidTy, Int64Ty, Int64Ty));
-
- SanCovTraceConstCmpFunction[0] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceConstCmp1, VoidTy, Int8Ty, Int8Ty));
- SanCovTraceConstCmpFunction[1] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceConstCmp2, VoidTy, Int16Ty, Int16Ty));
- SanCovTraceConstCmpFunction[2] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceConstCmp4, VoidTy, Int32Ty, Int32Ty));
+ M.getOrInsertFunction(SanCovTraceCmp8, VoidTy, Int64Ty, Int64Ty);
+
+ SanCovTraceConstCmpFunction[0] = M.getOrInsertFunction(
+ SanCovTraceConstCmp1, SanCovTraceCmpZeroExtAL, VoidTy, Int8Ty, Int8Ty);
+ SanCovTraceConstCmpFunction[1] = M.getOrInsertFunction(
+ SanCovTraceConstCmp2, SanCovTraceCmpZeroExtAL, VoidTy, Int16Ty, Int16Ty);
+ SanCovTraceConstCmpFunction[2] = M.getOrInsertFunction(
+ SanCovTraceConstCmp4, SanCovTraceCmpZeroExtAL, VoidTy, Int32Ty, Int32Ty);
SanCovTraceConstCmpFunction[3] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceConstCmp8, VoidTy, Int64Ty, Int64Ty));
-
- SanCovTraceDivFunction[0] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceDiv4, VoidTy, IRB.getInt32Ty()));
+ M.getOrInsertFunction(SanCovTraceConstCmp8, VoidTy, Int64Ty, Int64Ty);
+
+ {
+ AttributeList AL;
+ if (TargetTriple.getArch() == Triple::x86_64)
+ AL = AL.addParamAttribute(*C, 0, Attribute::ZExt);
+ SanCovTraceDivFunction[0] =
+ M.getOrInsertFunction(SanCovTraceDiv4, AL, VoidTy, IRB.getInt32Ty());
+ }
SanCovTraceDivFunction[1] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceDiv8, VoidTy, Int64Ty));
+ M.getOrInsertFunction(SanCovTraceDiv8, VoidTy, Int64Ty);
SanCovTraceGepFunction =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceGep, VoidTy, IntptrTy));
+ M.getOrInsertFunction(SanCovTraceGep, VoidTy, IntptrTy);
SanCovTraceSwitchFunction =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy));
+ M.getOrInsertFunction(SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy);
Constant *SanCovLowestStackConstant =
M.getOrInsertGlobal(SanCovLowestStackName, IntptrTy);
- SanCovLowestStack = cast<GlobalVariable>(SanCovLowestStackConstant);
+ SanCovLowestStack = dyn_cast<GlobalVariable>(SanCovLowestStackConstant);
+ if (!SanCovLowestStack) {
+ C->emitError(StringRef("'") + SanCovLowestStackName +
+ "' should not be declared by the user");
+ return true;
+ }
SanCovLowestStack->setThreadLocalMode(
GlobalValue::ThreadLocalMode::InitialExecTLSModel);
if (Options.StackDepth && !SanCovLowestStack->isDeclaration())
SanCovLowestStack->setInitializer(Constant::getAllOnesValue(IntptrTy));
- // Make sure smaller parameters are zero-extended to i64 as required by the
- // x86_64 ABI.
- if (TargetTriple.getArch() == Triple::x86_64) {
- for (int i = 0; i < 3; i++) {
- SanCovTraceCmpFunction[i]->addParamAttr(0, Attribute::ZExt);
- SanCovTraceCmpFunction[i]->addParamAttr(1, Attribute::ZExt);
- SanCovTraceConstCmpFunction[i]->addParamAttr(0, Attribute::ZExt);
- SanCovTraceConstCmpFunction[i]->addParamAttr(1, Attribute::ZExt);
- }
- SanCovTraceDivFunction[0]->addParamAttr(0, Attribute::ZExt);
- }
-
-
// We insert an empty inline asm after cov callbacks to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
/*hasSideEffects=*/true);
- SanCovTracePC = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovTracePCName, VoidTy));
- SanCovTracePCGuard = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTracePCGuardName, VoidTy, Int32PtrTy));
+ SanCovTracePC = M.getOrInsertFunction(SanCovTracePCName, VoidTy);
+ SanCovTracePCGuard =
+ M.getOrInsertFunction(SanCovTracePCGuardName, VoidTy, Int32PtrTy);
for (auto &F : M)
runOnFunction(F);
@@ -407,14 +408,16 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
Function *Ctor = nullptr;
if (FunctionGuardArray)
- Ctor = CreateInitCallsForSections(M, SanCovTracePCGuardInitName, Int32PtrTy,
+ Ctor = CreateInitCallsForSections(M, SanCovModuleCtorTracePcGuardName,
+ SanCovTracePCGuardInitName, Int32PtrTy,
SanCovGuardsSectionName);
if (Function8bitCounterArray)
- Ctor = CreateInitCallsForSections(M, SanCov8bitCountersInitName, Int8PtrTy,
+ Ctor = CreateInitCallsForSections(M, SanCovModuleCtor8bitCountersName,
+ SanCov8bitCountersInitName, Int8PtrTy,
SanCovCountersSectionName);
if (Ctor && Options.PCTable) {
auto SecStartEnd = CreateSecStartEnd(M, SanCovPCsSectionName, IntptrPtrTy);
- Function *InitFunction = declareSanitizerInitFunction(
+ FunctionCallee InitFunction = declareSanitizerInitFunction(
M, SanCovPCsInitName, {IntptrPtrTy, IntptrPtrTy});
IRBuilder<> IRBCtor(Ctor->getEntryBlock().getTerminator());
IRBCtor.CreateCall(InitFunction, {SecStartEnd.first, SecStartEnd.second});
@@ -458,12 +461,12 @@ static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB,
const DominatorTree *DT,
const PostDominatorTree *PDT,
const SanitizerCoverageOptions &Options) {
- // Don't insert coverage for unreachable blocks: we will never call
- // __sanitizer_cov() for them, so counting them in
+ // Don't insert coverage for blocks containing nothing but unreachable: we
+ // will never call __sanitizer_cov() for them, so counting them in
// NumberOfInstrumentedBlocks() might complicate calculation of code coverage
// percentage. Also, unreachable instructions frequently have no debug
// locations.
- if (isa<UnreachableInst>(BB->getTerminator()))
+ if (isa<UnreachableInst>(BB->getFirstNonPHIOrDbgOrLifetime()))
return false;
// Don't insert coverage into blocks without a valid insertion point
@@ -484,6 +487,37 @@ static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB,
&& !(isFullPostDominator(BB, PDT) && !BB->getSinglePredecessor());
}
+
+// Returns true iff From->To is a backedge.
+// A twist here is that we treat From->To as a backedge if
+// * To dominates From or
+// * To->UniqueSuccessor dominates From
+static bool IsBackEdge(BasicBlock *From, BasicBlock *To,
+ const DominatorTree *DT) {
+ if (DT->dominates(To, From))
+ return true;
+ if (auto Next = To->getUniqueSuccessor())
+ if (DT->dominates(Next, From))
+ return true;
+ return false;
+}
+
+// Prunes uninteresting Cmp instrumentation:
+// * CMP instructions that feed into loop backedge branch.
+//
+// Note that Cmp pruning is controlled by the same flag as the
+// BB pruning.
+static bool IsInterestingCmp(ICmpInst *CMP, const DominatorTree *DT,
+ const SanitizerCoverageOptions &Options) {
+ if (!Options.NoPrune)
+ if (CMP->hasOneUse())
+ if (auto BR = dyn_cast<BranchInst>(CMP->user_back()))
+ for (BasicBlock *B : BR->successors())
+ if (IsBackEdge(BR->getParent(), B, DT))
+ return false;
+ return true;
+}
+
bool SanitizerCoverageModule::runOnFunction(Function &F) {
if (F.empty())
return false;
@@ -508,7 +542,7 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
return false;
if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
- SplitAllCriticalEdges(F);
+ SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions().setIgnoreUnreachableDests());
SmallVector<Instruction *, 8> IndirCalls;
SmallVector<BasicBlock *, 16> BlocksToInstrument;
SmallVector<Instruction *, 8> CmpTraceTargets;
@@ -532,8 +566,9 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
IndirCalls.push_back(&Inst);
}
if (Options.TraceCmp) {
- if (isa<ICmpInst>(&Inst))
- CmpTraceTargets.push_back(&Inst);
+ if (ICmpInst *CMP = dyn_cast<ICmpInst>(&Inst))
+ if (IsInterestingCmp(CMP, DT, Options))
+ CmpTraceTargets.push_back(&Inst);
if (isa<SwitchInst>(&Inst))
SwitchTraceTargets.push_back(&Inst);
}
@@ -797,9 +832,9 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
}
if (Options.Inline8bitCounters) {
auto CounterPtr = IRB.CreateGEP(
- Function8bitCounterArray,
+ Function8bitCounterArray->getValueType(), Function8bitCounterArray,
{ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)});
- auto Load = IRB.CreateLoad(CounterPtr);
+ auto Load = IRB.CreateLoad(Int8Ty, CounterPtr);
auto Inc = IRB.CreateAdd(Load, ConstantInt::get(Int8Ty, 1));
auto Store = IRB.CreateStore(Inc, CounterPtr);
SetNoSanitizeMetadata(Load);
@@ -812,7 +847,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
auto FrameAddrPtr =
IRB.CreateCall(GetFrameAddr, {Constant::getNullValue(Int32Ty)});
auto FrameAddrInt = IRB.CreatePtrToInt(FrameAddrPtr, IntptrTy);
- auto LowestStack = IRB.CreateLoad(SanCovLowestStack);
+ auto LowestStack = IRB.CreateLoad(IntptrTy, SanCovLowestStack);
auto IsStackLower = IRB.CreateICmpULT(FrameAddrInt, LowestStack);
auto ThenTerm = SplitBlockAndInsertIfThen(IsStackLower, &*IP, false);
IRBuilder<> ThenIRB(ThenTerm);
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 077364e15c4f..5be13fa745cb 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -1,9 +1,8 @@
//===-- ThreadSanitizer.cpp - race detector -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -111,25 +110,26 @@ private:
Type *IntptrTy;
IntegerType *OrdTy;
// Callbacks to run-time library are computed in doInitialization.
- Function *TsanFuncEntry;
- Function *TsanFuncExit;
- Function *TsanIgnoreBegin;
- Function *TsanIgnoreEnd;
+ FunctionCallee TsanFuncEntry;
+ FunctionCallee TsanFuncExit;
+ FunctionCallee TsanIgnoreBegin;
+ FunctionCallee TsanIgnoreEnd;
// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
static const size_t kNumberOfAccessSizes = 5;
- Function *TsanRead[kNumberOfAccessSizes];
- Function *TsanWrite[kNumberOfAccessSizes];
- Function *TsanUnalignedRead[kNumberOfAccessSizes];
- Function *TsanUnalignedWrite[kNumberOfAccessSizes];
- Function *TsanAtomicLoad[kNumberOfAccessSizes];
- Function *TsanAtomicStore[kNumberOfAccessSizes];
- Function *TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1][kNumberOfAccessSizes];
- Function *TsanAtomicCAS[kNumberOfAccessSizes];
- Function *TsanAtomicThreadFence;
- Function *TsanAtomicSignalFence;
- Function *TsanVptrUpdate;
- Function *TsanVptrLoad;
- Function *MemmoveFn, *MemcpyFn, *MemsetFn;
+ FunctionCallee TsanRead[kNumberOfAccessSizes];
+ FunctionCallee TsanWrite[kNumberOfAccessSizes];
+ FunctionCallee TsanUnalignedRead[kNumberOfAccessSizes];
+ FunctionCallee TsanUnalignedWrite[kNumberOfAccessSizes];
+ FunctionCallee TsanAtomicLoad[kNumberOfAccessSizes];
+ FunctionCallee TsanAtomicStore[kNumberOfAccessSizes];
+ FunctionCallee TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1]
+ [kNumberOfAccessSizes];
+ FunctionCallee TsanAtomicCAS[kNumberOfAccessSizes];
+ FunctionCallee TsanAtomicThreadFence;
+ FunctionCallee TsanAtomicSignalFence;
+ FunctionCallee TsanVptrUpdate;
+ FunctionCallee TsanVptrLoad;
+ FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
Function *TsanCtorFunction;
};
@@ -189,14 +189,14 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
Attr = Attr.addAttribute(M.getContext(), AttributeList::FunctionIndex,
Attribute::NoUnwind);
// Initialize the callbacks.
- TsanFuncEntry = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__tsan_func_entry", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
- TsanFuncExit = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("__tsan_func_exit", Attr, IRB.getVoidTy()));
- TsanIgnoreBegin = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__tsan_ignore_thread_begin", Attr, IRB.getVoidTy()));
- TsanIgnoreEnd = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__tsan_ignore_thread_end", Attr, IRB.getVoidTy()));
+ TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", Attr,
+ IRB.getVoidTy(), IRB.getInt8PtrTy());
+ TsanFuncExit =
+ M.getOrInsertFunction("__tsan_func_exit", Attr, IRB.getVoidTy());
+ TsanIgnoreBegin = M.getOrInsertFunction("__tsan_ignore_thread_begin", Attr,
+ IRB.getVoidTy());
+ TsanIgnoreEnd =
+ M.getOrInsertFunction("__tsan_ignore_thread_end", Attr, IRB.getVoidTy());
OrdTy = IRB.getInt32Ty();
for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
const unsigned ByteSize = 1U << i;
@@ -204,32 +204,30 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
std::string ByteSizeStr = utostr(ByteSize);
std::string BitSizeStr = utostr(BitSize);
SmallString<32> ReadName("__tsan_read" + ByteSizeStr);
- TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- ReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
+ TsanRead[i] = M.getOrInsertFunction(ReadName, Attr, IRB.getVoidTy(),
+ IRB.getInt8PtrTy());
SmallString<32> WriteName("__tsan_write" + ByteSizeStr);
- TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- WriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
+ TsanWrite[i] = M.getOrInsertFunction(WriteName, Attr, IRB.getVoidTy(),
+ IRB.getInt8PtrTy());
SmallString<64> UnalignedReadName("__tsan_unaligned_read" + ByteSizeStr);
- TsanUnalignedRead[i] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- UnalignedReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
+ TsanUnalignedRead[i] = M.getOrInsertFunction(
+ UnalignedReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + ByteSizeStr);
- TsanUnalignedWrite[i] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- UnalignedWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
+ TsanUnalignedWrite[i] = M.getOrInsertFunction(
+ UnalignedWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
Type *PtrTy = Ty->getPointerTo();
SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load");
- TsanAtomicLoad[i] = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(AtomicLoadName, Attr, Ty, PtrTy, OrdTy));
+ TsanAtomicLoad[i] =
+ M.getOrInsertFunction(AtomicLoadName, Attr, Ty, PtrTy, OrdTy);
SmallString<32> AtomicStoreName("__tsan_atomic" + BitSizeStr + "_store");
- TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- AtomicStoreName, Attr, IRB.getVoidTy(), PtrTy, Ty, OrdTy));
+ TsanAtomicStore[i] = M.getOrInsertFunction(
+ AtomicStoreName, Attr, IRB.getVoidTy(), PtrTy, Ty, OrdTy);
for (int op = AtomicRMWInst::FIRST_BINOP;
op <= AtomicRMWInst::LAST_BINOP; ++op) {
@@ -252,34 +250,34 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
else
continue;
SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart);
- TsanAtomicRMW[op][i] = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(RMWName, Attr, Ty, PtrTy, Ty, OrdTy));
+ TsanAtomicRMW[op][i] =
+ M.getOrInsertFunction(RMWName, Attr, Ty, PtrTy, Ty, OrdTy);
}
SmallString<32> AtomicCASName("__tsan_atomic" + BitSizeStr +
"_compare_exchange_val");
- TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- AtomicCASName, Attr, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy));
+ TsanAtomicCAS[i] = M.getOrInsertFunction(AtomicCASName, Attr, Ty, PtrTy, Ty,
+ Ty, OrdTy, OrdTy);
}
- TsanVptrUpdate = checkSanitizerInterfaceFunction(
+ TsanVptrUpdate =
M.getOrInsertFunction("__tsan_vptr_update", Attr, IRB.getVoidTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy()));
- TsanVptrLoad = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__tsan_vptr_read", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
- TsanAtomicThreadFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__tsan_atomic_thread_fence", Attr, IRB.getVoidTy(), OrdTy));
- TsanAtomicSignalFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__tsan_atomic_signal_fence", Attr, IRB.getVoidTy(), OrdTy));
-
- MemmoveFn = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("memmove", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy));
- MemcpyFn = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("memcpy", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy));
- MemsetFn = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("memset", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt32Ty(), IntptrTy));
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy());
+ TsanVptrLoad = M.getOrInsertFunction("__tsan_vptr_read", Attr,
+ IRB.getVoidTy(), IRB.getInt8PtrTy());
+ TsanAtomicThreadFence = M.getOrInsertFunction("__tsan_atomic_thread_fence",
+ Attr, IRB.getVoidTy(), OrdTy);
+ TsanAtomicSignalFence = M.getOrInsertFunction("__tsan_atomic_signal_fence",
+ Attr, IRB.getVoidTy(), OrdTy);
+
+ MemmoveFn =
+ M.getOrInsertFunction("memmove", Attr, IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
+ MemcpyFn =
+ M.getOrInsertFunction("memcpy", Attr, IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
+ MemsetFn =
+ M.getOrInsertFunction("memset", Attr, IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy);
}
ThreadSanitizer::ThreadSanitizer(Module &M) {
@@ -291,7 +289,9 @@ ThreadSanitizer::ThreadSanitizer(Module &M) {
/*InitArgs=*/{},
// This callback is invoked when the functions are created the first
// time. Hook them into the global ctors list in that case:
- [&](Function *Ctor, Function *) { appendToGlobalCtors(M, Ctor, 0); });
+ [&](Function *Ctor, FunctionCallee) {
+ appendToGlobalCtors(M, Ctor, 0);
+ });
}
static bool isVtableAccess(Instruction *I) {
@@ -559,7 +559,7 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I,
: cast<LoadInst>(I)->getAlignment();
Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
- Value *OnAccessFunc = nullptr;
+ FunctionCallee OnAccessFunc = nullptr;
if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0)
OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
else
@@ -659,7 +659,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
- Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx];
+ FunctionCallee F = TsanAtomicRMW[RMWI->getOperation()][Idx];
if (!F)
return false;
const unsigned ByteSize = 1U << Idx;
@@ -706,8 +706,9 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
I->eraseFromParent();
} else if (FenceInst *FI = dyn_cast<FenceInst>(I)) {
Value *Args[] = {createOrdering(&IRB, FI->getOrdering())};
- Function *F = FI->getSyncScopeID() == SyncScope::SingleThread ?
- TsanAtomicSignalFence : TsanAtomicThreadFence;
+ FunctionCallee F = FI->getSyncScopeID() == SyncScope::SingleThread
+ ? TsanAtomicSignalFence
+ : TsanAtomicThreadFence;
CallInst *C = CallInst::Create(F, Args);
ReplaceInstWithInst(I, C);
}
diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index 7f6b157304a3..e1e95cd6a407 100644
--- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -1,9 +1,8 @@
//===- ARCRuntimeEntryPoints.h - ObjC ARC Optimization ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,7 +33,7 @@
namespace llvm {
-class Constant;
+class Function;
class LLVMContext;
namespace objcarc {
@@ -70,7 +69,7 @@ public:
RetainAutoreleaseRV = nullptr;
}
- Constant *get(ARCRuntimeEntryPointKind kind) {
+ Function *get(ARCRuntimeEntryPointKind kind) {
assert(TheModule != nullptr && "Not initialized.");
switch (kind) {
@@ -106,33 +105,33 @@ private:
Module *TheModule = nullptr;
/// Declaration for ObjC runtime function objc_autoreleaseReturnValue.
- Constant *AutoreleaseRV = nullptr;
+ Function *AutoreleaseRV = nullptr;
/// Declaration for ObjC runtime function objc_release.
- Constant *Release = nullptr;
+ Function *Release = nullptr;
/// Declaration for ObjC runtime function objc_retain.
- Constant *Retain = nullptr;
+ Function *Retain = nullptr;
/// Declaration for ObjC runtime function objc_retainBlock.
- Constant *RetainBlock = nullptr;
+ Function *RetainBlock = nullptr;
/// Declaration for ObjC runtime function objc_autorelease.
- Constant *Autorelease = nullptr;
+ Function *Autorelease = nullptr;
/// Declaration for objc_storeStrong().
- Constant *StoreStrong = nullptr;
+ Function *StoreStrong = nullptr;
/// Declaration for objc_retainAutoreleasedReturnValue().
- Constant *RetainRV = nullptr;
+ Function *RetainRV = nullptr;
/// Declaration for objc_retainAutorelease().
- Constant *RetainAutorelease = nullptr;
+ Function *RetainAutorelease = nullptr;
/// Declaration for objc_retainAutoreleaseReturnValue().
- Constant *RetainAutoreleaseRV = nullptr;
+ Function *RetainAutoreleaseRV = nullptr;
- Constant *getIntrinsicEntryPoint(Constant *&Decl, Intrinsic::ID IntID) {
+ Function *getIntrinsicEntryPoint(Function *&Decl, Intrinsic::ID IntID) {
if (Decl)
return Decl;
diff --git a/lib/Transforms/ObjCARC/BlotMapVector.h b/lib/Transforms/ObjCARC/BlotMapVector.h
index 9ade14c1177a..2fa07cfb32c0 100644
--- a/lib/Transforms/ObjCARC/BlotMapVector.h
+++ b/lib/Transforms/ObjCARC/BlotMapVector.h
@@ -1,9 +1,8 @@
//===- BlotMapVector.h - A MapVector with the blot operation ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 4bd5fd1acd4c..e8f8fb6f3a7c 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -1,9 +1,8 @@
//===- DependencyAnalysis.cpp - ObjC ARC Optimization ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.h b/lib/Transforms/ObjCARC/DependencyAnalysis.h
index 0f13b02c806f..ed89c8c8fc89 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.h
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.h
@@ -1,9 +1,8 @@
//===- DependencyAnalysis.h - ObjC ARC Optimization ---*- C++ -*-----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp
index c30aaebd0f4d..f4da51650a7d 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -1,9 +1,8 @@
//===-- ObjCARC.cpp -------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h
index 751c8f30e814..d465630800b9 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/lib/Transforms/ObjCARC/ObjCARC.h
@@ -1,9 +1,8 @@
//===- ObjCARC.h - ObjC ARC Optimization --------------*- C++ -*-----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
index 8d3ef8fde534..b341dd807508 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -1,9 +1,8 @@
//===- ObjCARCAPElim.cpp - ObjC ARC Optimization --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index abe2871c0b8f..36aa513ec554 100644
--- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -1,9 +1,8 @@
//===- ObjCARCContract.cpp - ObjC ARC Optimization ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -46,6 +45,10 @@ using namespace llvm::objcarc;
STATISTIC(NumPeeps, "Number of calls peephole-optimized");
STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
+static cl::opt<unsigned> MaxBBSize("arc-contract-max-bb-size", cl::Hidden,
+ cl::desc("Maximum basic block size to discover the dominance relation of "
+ "two instructions in the same basic block"), cl::init(65535));
+
//===----------------------------------------------------------------------===//
// Declarations
//===----------------------------------------------------------------------===//
@@ -140,7 +143,7 @@ bool ObjCARCContract::optimizeRetainCall(Function &F, Instruction *Retain) {
// We do not have to worry about tail calls/does not throw since
// retain/retainRV have the same properties.
- Constant *Decl = EP.get(ARCRuntimeEntryPointKind::RetainRV);
+ Function *Decl = EP.get(ARCRuntimeEntryPointKind::RetainRV);
cast<CallInst>(Retain)->setCalledFunction(Decl);
LLVM_DEBUG(dbgs() << "New: " << *Retain << "\n");
@@ -189,7 +192,7 @@ bool ObjCARCContract::contractAutorelease(
" Retain: "
<< *Retain << "\n");
- Constant *Decl = EP.get(Class == ARCInstKind::AutoreleaseRV
+ Function *Decl = EP.get(Class == ARCInstKind::AutoreleaseRV
? ARCRuntimeEntryPointKind::RetainAutoreleaseRV
: ARCRuntimeEntryPointKind::RetainAutorelease);
Retain->setCalledFunction(Decl);
@@ -314,8 +317,8 @@ findRetainForStoreStrongContraction(Value *New, StoreInst *Store,
/// Create a call instruction with the correct funclet token. Should be used
/// instead of calling CallInst::Create directly.
static CallInst *
-createCallInst(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr,
- Instruction *InsertBefore,
+createCallInst(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
+ const Twine &NameStr, Instruction *InsertBefore,
const DenseMap<BasicBlock *, ColorVector> &BlockColors) {
SmallVector<OperandBundleDef, 1> OpBundles;
if (!BlockColors.empty()) {
@@ -326,7 +329,15 @@ createCallInst(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr,
OpBundles.emplace_back("funclet", EHPad);
}
- return CallInst::Create(Func, Args, OpBundles, NameStr, InsertBefore);
+ return CallInst::Create(FTy, Func, Args, OpBundles, NameStr, InsertBefore);
+}
+
+static CallInst *
+createCallInst(FunctionCallee Func, ArrayRef<Value *> Args, const Twine &NameStr,
+ Instruction *InsertBefore,
+ const DenseMap<BasicBlock *, ColorVector> &BlockColors) {
+ return createCallInst(Func.getFunctionType(), Func.getCallee(), Args, NameStr,
+ InsertBefore, BlockColors);
}
/// Attempt to merge an objc_release with a store, load, and objc_retain to form
@@ -409,7 +420,7 @@ void ObjCARCContract::tryToContractReleaseIntoStoreStrong(
Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
if (Args[1]->getType() != I8X)
Args[1] = new BitCastInst(Args[1], I8X, "", Store);
- Constant *Decl = EP.get(ARCRuntimeEntryPointKind::StoreStrong);
+ Function *Decl = EP.get(ARCRuntimeEntryPointKind::StoreStrong);
CallInst *StoreStrong = createCallInst(Decl, Args, "", Store, BlockColors);
StoreStrong->setDoesNotThrow();
StoreStrong->setDebugLoc(Store->getDebugLoc());
@@ -432,102 +443,100 @@ void ObjCARCContract::tryToContractReleaseIntoStoreStrong(
}
bool ObjCARCContract::tryToPeepholeInstruction(
- Function &F, Instruction *Inst, inst_iterator &Iter,
- SmallPtrSetImpl<Instruction *> &DependingInsts,
- SmallPtrSetImpl<const BasicBlock *> &Visited,
- bool &TailOkForStoreStrongs,
- const DenseMap<BasicBlock *, ColorVector> &BlockColors) {
- // Only these library routines return their argument. In particular,
- // objc_retainBlock does not necessarily return its argument.
+ Function &F, Instruction *Inst, inst_iterator &Iter,
+ SmallPtrSetImpl<Instruction *> &DependingInsts,
+ SmallPtrSetImpl<const BasicBlock *> &Visited, bool &TailOkForStoreStrongs,
+ const DenseMap<BasicBlock *, ColorVector> &BlockColors) {
+ // Only these library routines return their argument. In particular,
+ // objc_retainBlock does not necessarily return its argument.
ARCInstKind Class = GetBasicARCInstKind(Inst);
- switch (Class) {
- case ARCInstKind::FusedRetainAutorelease:
- case ARCInstKind::FusedRetainAutoreleaseRV:
+ switch (Class) {
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ return false;
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ return contractAutorelease(F, Inst, Class, DependingInsts, Visited);
+ case ARCInstKind::Retain:
+ // Attempt to convert retains to retainrvs if they are next to function
+ // calls.
+ if (!optimizeRetainCall(F, Inst))
return false;
- case ARCInstKind::Autorelease:
- case ARCInstKind::AutoreleaseRV:
- return contractAutorelease(F, Inst, Class, DependingInsts, Visited);
- case ARCInstKind::Retain:
- // Attempt to convert retains to retainrvs if they are next to function
- // calls.
- if (!optimizeRetainCall(F, Inst))
- return false;
- // If we succeed in our optimization, fall through.
- LLVM_FALLTHROUGH;
- case ARCInstKind::RetainRV:
- case ARCInstKind::ClaimRV: {
- // If we're compiling for a target which needs a special inline-asm
- // marker to do the return value optimization, insert it now.
- if (!RVInstMarker)
- return false;
- BasicBlock::iterator BBI = Inst->getIterator();
- BasicBlock *InstParent = Inst->getParent();
-
- // Step up to see if the call immediately precedes the RV call.
- // If it's an invoke, we have to cross a block boundary. And we have
- // to carefully dodge no-op instructions.
- do {
- if (BBI == InstParent->begin()) {
- BasicBlock *Pred = InstParent->getSinglePredecessor();
- if (!Pred)
- goto decline_rv_optimization;
- BBI = Pred->getTerminator()->getIterator();
- break;
- }
- --BBI;
- } while (IsNoopInstruction(&*BBI));
-
- if (&*BBI == GetArgRCIdentityRoot(Inst)) {
- LLVM_DEBUG(dbgs() << "Adding inline asm marker for the return value "
- "optimization.\n");
- Changed = true;
- InlineAsm *IA = InlineAsm::get(
- FunctionType::get(Type::getVoidTy(Inst->getContext()),
- /*isVarArg=*/false),
- RVInstMarker->getString(),
- /*Constraints=*/"", /*hasSideEffects=*/true);
-
- createCallInst(IA, None, "", Inst, BlockColors);
- }
- decline_rv_optimization:
+ // If we succeed in our optimization, fall through.
+ LLVM_FALLTHROUGH;
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::ClaimRV: {
+ // If we're compiling for a target which needs a special inline-asm
+ // marker to do the return value optimization, insert it now.
+ if (!RVInstMarker)
return false;
- }
- case ARCInstKind::InitWeak: {
- // objc_initWeak(p, null) => *p = null
- CallInst *CI = cast<CallInst>(Inst);
- if (IsNullOrUndef(CI->getArgOperand(1))) {
- Value *Null =
- ConstantPointerNull::get(cast<PointerType>(CI->getType()));
- Changed = true;
- new StoreInst(Null, CI->getArgOperand(0), CI);
-
- LLVM_DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n"
- << " New = " << *Null << "\n");
-
- CI->replaceAllUsesWith(Null);
- CI->eraseFromParent();
+ BasicBlock::iterator BBI = Inst->getIterator();
+ BasicBlock *InstParent = Inst->getParent();
+
+ // Step up to see if the call immediately precedes the RV call.
+ // If it's an invoke, we have to cross a block boundary. And we have
+ // to carefully dodge no-op instructions.
+ do {
+ if (BBI == InstParent->begin()) {
+ BasicBlock *Pred = InstParent->getSinglePredecessor();
+ if (!Pred)
+ goto decline_rv_optimization;
+ BBI = Pred->getTerminator()->getIterator();
+ break;
}
- return true;
+ --BBI;
+ } while (IsNoopInstruction(&*BBI));
+
+ if (&*BBI == GetArgRCIdentityRoot(Inst)) {
+ LLVM_DEBUG(dbgs() << "Adding inline asm marker for the return value "
+ "optimization.\n");
+ Changed = true;
+ InlineAsm *IA =
+ InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()),
+ /*isVarArg=*/false),
+ RVInstMarker->getString(),
+ /*Constraints=*/"", /*hasSideEffects=*/true);
+
+ createCallInst(IA, None, "", Inst, BlockColors);
}
- case ARCInstKind::Release:
- // Try to form an objc store strong from our release. If we fail, there is
- // nothing further to do below, so continue.
- tryToContractReleaseIntoStoreStrong(Inst, Iter, BlockColors);
- return true;
- case ARCInstKind::User:
- // Be conservative if the function has any alloca instructions.
- // Technically we only care about escaping alloca instructions,
- // but this is sufficient to handle some interesting cases.
- if (isa<AllocaInst>(Inst))
- TailOkForStoreStrongs = false;
- return true;
- case ARCInstKind::IntrinsicUser:
- // Remove calls to @llvm.objc.clang.arc.use(...).
- Inst->eraseFromParent();
- return true;
- default:
- return true;
+ decline_rv_optimization:
+ return false;
+ }
+ case ARCInstKind::InitWeak: {
+ // objc_initWeak(p, null) => *p = null
+ CallInst *CI = cast<CallInst>(Inst);
+ if (IsNullOrUndef(CI->getArgOperand(1))) {
+ Value *Null = ConstantPointerNull::get(cast<PointerType>(CI->getType()));
+ Changed = true;
+ new StoreInst(Null, CI->getArgOperand(0), CI);
+
+ LLVM_DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n"
+ << " New = " << *Null << "\n");
+
+ CI->replaceAllUsesWith(Null);
+ CI->eraseFromParent();
}
+ return true;
+ }
+ case ARCInstKind::Release:
+ // Try to form an objc store strong from our release. If we fail, there is
+ // nothing further to do below, so continue.
+ tryToContractReleaseIntoStoreStrong(Inst, Iter, BlockColors);
+ return true;
+ case ARCInstKind::User:
+ // Be conservative if the function has any alloca instructions.
+ // Technically we only care about escaping alloca instructions,
+ // but this is sufficient to handle some interesting cases.
+ if (isa<AllocaInst>(Inst))
+ TailOkForStoreStrongs = false;
+ return true;
+ case ARCInstKind::IntrinsicUser:
+ // Remove calls to @llvm.objc.clang.arc.use(...).
+ Inst->eraseFromParent();
+ return true;
+ default:
+ return true;
+ }
}
//===----------------------------------------------------------------------===//
@@ -568,6 +577,24 @@ bool ObjCARCContract::runOnFunction(Function &F) {
// reduces register pressure.
SmallPtrSet<Instruction *, 4> DependingInstructions;
SmallPtrSet<const BasicBlock *, 4> Visited;
+
+ // Cache the basic block size.
+ DenseMap<const BasicBlock *, unsigned> BBSizeMap;
+
+ // A lambda that lazily computes the size of a basic block and determines
+ // whether the size exceeds MaxBBSize.
+ auto IsLargeBB = [&](const BasicBlock *BB) {
+ unsigned BBSize;
+ auto I = BBSizeMap.find(BB);
+
+ if (I != BBSizeMap.end())
+ BBSize = I->second;
+ else
+ BBSize = BBSizeMap[BB] = BB->size();
+
+ return BBSize > MaxBBSize;
+ };
+
for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E;) {
Instruction *Inst = &*I++;
@@ -585,7 +612,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
// and such; to do the replacement, the argument must have type i8*.
// Function for replacing uses of Arg dominated by Inst.
- auto ReplaceArgUses = [Inst, this](Value *Arg) {
+ auto ReplaceArgUses = [Inst, IsLargeBB, this](Value *Arg) {
// If we're compiling bugpointed code, don't get in trouble.
if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
return;
@@ -597,6 +624,17 @@ bool ObjCARCContract::runOnFunction(Function &F) {
Use &U = *UI++;
unsigned OperandNo = U.getOperandNo();
+ // Don't replace the uses if Inst and the user belong to the same basic
+ // block and the size of the basic block is large. We don't want to call
+ // DominatorTree::dominate in that case. We can remove this check if we
+ // can use OrderedBasicBlock to compute the dominance relation between
+ // two instructions, but that's not currently possible since it doesn't
+ // recompute the instruction ordering when new instructions are inserted
+ // to the basic block.
+ if (Inst->getParent() == cast<Instruction>(U.getUser())->getParent() &&
+ IsLargeBB(Inst->getParent()))
+ continue;
+
// If the call's return value dominates a use of the call's argument
// value, rewrite the use to use the return value. We check for
// reachability here because an unreachable call is considered to
@@ -737,15 +775,8 @@ bool ObjCARCContract::doInitialization(Module &M) {
EP.init(&M);
// Initialize RVInstMarker.
- RVInstMarker = nullptr;
- if (NamedMDNode *NMD =
- M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
- if (NMD->getNumOperands() == 1) {
- const MDNode *N = NMD->getOperand(0);
- if (N->getNumOperands() == 1)
- if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
- RVInstMarker = S;
- }
+ const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
+ RVInstMarker = dyn_cast_or_null<MDString>(M.getModuleFlag(MarkerKey));
return false;
}
diff --git a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
index 6a345ef56e1b..04e98d8f5577 100644
--- a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
@@ -1,9 +1,8 @@
//===- ObjCARCExpand.cpp - ObjC ARC Optimization --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 9a02174556fc..6653ff0bb91a 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -1,9 +1,8 @@
//===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -74,6 +73,11 @@ using namespace llvm::objcarc;
#define DEBUG_TYPE "objc-arc-opts"
+static cl::opt<unsigned> MaxPtrStates("arc-opt-max-ptr-states",
+ cl::Hidden,
+ cl::desc("Maximum number of ptr states the optimizer keeps track of"),
+ cl::init(4095));
+
/// \defgroup ARCUtilities Utility declarations/definitions specific to ARC.
/// @{
@@ -220,6 +224,10 @@ namespace {
return !PerPtrTopDown.empty();
}
+ unsigned top_down_ptr_list_size() const {
+ return std::distance(top_down_ptr_begin(), top_down_ptr_end());
+ }
+
using bottom_up_ptr_iterator = decltype(PerPtrBottomUp)::iterator;
using const_bottom_up_ptr_iterator =
decltype(PerPtrBottomUp)::const_iterator;
@@ -238,6 +246,10 @@ namespace {
return !PerPtrBottomUp.empty();
}
+ unsigned bottom_up_ptr_list_size() const {
+ return std::distance(bottom_up_ptr_begin(), bottom_up_ptr_end());
+ }
+
/// Mark this block as being an entry block, which has one path from the
/// entry by definition.
void SetAsEntry() { TopDownPathCount = 1; }
@@ -481,6 +493,10 @@ namespace {
/// A flag indicating whether this optimization pass should run.
bool Run;
+ /// A flag indicating whether the optimization that removes or moves
+ /// retain/release pairs should be performed.
+ bool DisableRetainReleasePairing = false;
+
/// Flags which determine whether each of the interesting runtime functions
/// is in fact used in the current function.
unsigned UsedInThisFunction;
@@ -642,7 +658,7 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
"Old = "
<< *RetainRV << "\n");
- Constant *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain);
+ Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain);
cast<CallInst>(RetainRV)->setCalledFunction(NewDecl);
LLVM_DEBUG(dbgs() << "New = " << *RetainRV << "\n");
@@ -691,7 +707,7 @@ void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F,
<< *AutoreleaseRV << "\n");
CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
- Constant *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease);
+ Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease);
AutoreleaseRVCI->setCalledFunction(NewDecl);
AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
Class = ARCInstKind::Autorelease;
@@ -744,6 +760,19 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
LLVM_DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n");
+ // Some of the ARC calls can be deleted if their arguments are global
+ // variables that are inert in ARC.
+ if (IsNoopOnGlobal(Class)) {
+ Value *Opnd = Inst->getOperand(0);
+ if (auto *GV = dyn_cast<GlobalVariable>(Opnd->stripPointerCasts()))
+ if (GV->hasAttribute("objc_arc_inert")) {
+ if (!Inst->getType()->isVoidTy())
+ Inst->replaceAllUsesWith(Opnd);
+ Inst->eraseFromParent();
+ continue;
+ }
+ }
+
switch (Class) {
default: break;
@@ -830,7 +859,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// Create the declaration lazily.
LLVMContext &C = Inst->getContext();
- Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
+ Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
CallInst *NewCall = CallInst::Create(Decl, Call->getArgOperand(0), "",
Call);
NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease),
@@ -849,7 +878,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// For functions which can never be passed stack arguments, add
// a tail keyword.
- if (IsAlwaysTail(Class)) {
+ if (IsAlwaysTail(Class) && !cast<CallInst>(Inst)->isNoTailCall()) {
Changed = true;
LLVM_DEBUG(
dbgs() << "Adding tail keyword to function since it can never be "
@@ -1273,6 +1302,13 @@ bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
LLVM_DEBUG(dbgs() << " Visiting " << *Inst << "\n");
NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
+
+ // Bail out if the number of pointers being tracked becomes too large so
+ // that this pass can complete in a reasonable amount of time.
+ if (MyStates.bottom_up_ptr_list_size() > MaxPtrStates) {
+ DisableRetainReleasePairing = true;
+ return false;
+ }
}
// If there's a predecessor with an invoke, visit the invoke as if it were
@@ -1395,6 +1431,13 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
LLVM_DEBUG(dbgs() << " Visiting " << Inst << "\n");
NestingDetected |= VisitInstructionTopDown(&Inst, Releases, MyStates);
+
+ // Bail out if the number of pointers being tracked becomes too large so
+ // that this pass can complete in a reasonable amount of time.
+ if (MyStates.top_down_ptr_list_size() > MaxPtrStates) {
+ DisableRetainReleasePairing = true;
+ return false;
+ }
}
LLVM_DEBUG(dbgs() << "\nState Before Checking for CFG Hazards:\n"
@@ -1501,13 +1544,19 @@ bool ObjCARCOpt::Visit(Function &F,
// Use reverse-postorder on the reverse CFG for bottom-up.
bool BottomUpNestingDetected = false;
- for (BasicBlock *BB : llvm::reverse(ReverseCFGPostOrder))
+ for (BasicBlock *BB : llvm::reverse(ReverseCFGPostOrder)) {
BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
+ if (DisableRetainReleasePairing)
+ return false;
+ }
// Use reverse-postorder for top-down.
bool TopDownNestingDetected = false;
- for (BasicBlock *BB : llvm::reverse(PostOrder))
+ for (BasicBlock *BB : llvm::reverse(PostOrder)) {
TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases);
+ if (DisableRetainReleasePairing)
+ return false;
+ }
return TopDownNestingDetected && BottomUpNestingDetected;
}
@@ -1528,7 +1577,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove,
for (Instruction *InsertPt : ReleasesToMove.ReverseInsertPts) {
Value *MyArg = ArgTy == ParamTy ? Arg :
new BitCastInst(Arg, ParamTy, "", InsertPt);
- Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
+ Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt);
Call->setDoesNotThrow();
Call->setTailCall();
@@ -1541,7 +1590,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove,
for (Instruction *InsertPt : RetainsToMove.ReverseInsertPts) {
Value *MyArg = ArgTy == ParamTy ? Arg :
new BitCastInst(Arg, ParamTy, "", InsertPt);
- Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
+ Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt);
// Attach a clang.imprecise_release metadata tag, if appropriate.
if (MDNode *M = ReleasesToMove.ReleaseMetadata)
@@ -1877,7 +1926,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
Changed = true;
// If the load has a builtin retain, insert a plain retain for it.
if (Class == ARCInstKind::LoadWeakRetained) {
- Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
+ Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
CI->setTailCall();
}
@@ -1906,7 +1955,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
Changed = true;
// If the load has a builtin retain, insert a plain retain for it.
if (Class == ARCInstKind::LoadWeakRetained) {
- Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
+ Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
CI->setTailCall();
}
@@ -2003,6 +2052,9 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) {
// Analyze the CFG of the function, and all instructions.
bool NestingDetected = Visit(F, BBStates, Retains, Releases);
+ if (DisableRetainReleasePairing)
+ return false;
+
// Transform.
bool AnyPairsCompletelyEliminated = PerformCodePlacement(BBStates, Retains,
Releases,
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
index 3004fffb9745..c6138edba95a 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
@@ -1,9 +1,8 @@
//===- ProvenanceAnalysis.cpp - ObjC ARC Optimization ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
index 1276f564a022..8fd842fd42d6 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -1,9 +1,8 @@
//===- ProvenanceAnalysis.h - ObjC ARC Optimization -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
index 870a5f600fd8..b768f7973b87 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
@@ -1,9 +1,8 @@
//===- ProvenanceAnalysisEvaluator.cpp - ObjC ARC Optimization ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp
index 8a7b6a74fae2..3243481dee0d 100644
--- a/lib/Transforms/ObjCARC/PtrState.cpp
+++ b/lib/Transforms/ObjCARC/PtrState.cpp
@@ -1,9 +1,8 @@
//===- PtrState.cpp -------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/ObjCARC/PtrState.h b/lib/Transforms/ObjCARC/PtrState.h
index f5b9b853d8e3..66614c06cb79 100644
--- a/lib/Transforms/ObjCARC/PtrState.h
+++ b/lib/Transforms/ObjCARC/PtrState.h
@@ -1,9 +1,8 @@
//===- PtrState.h - ARC State for a Ptr -------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index b0602d96798c..7f7460c5746a 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -1,9 +1,8 @@
//===- ADCE.cpp - Code to perform dead code elimination -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,9 +19,11 @@
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/IteratedDominanceFrontier.h"
#include "llvm/Analysis/PostDominators.h"
@@ -30,7 +31,6 @@
#include "llvm/IR/CFG.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -136,7 +136,7 @@ class AggressiveDeadCodeElimination {
SmallPtrSet<const Metadata *, 32> AliveScopes;
/// Set of blocks with not known to have live terminators.
- SmallPtrSet<BasicBlock *, 16> BlocksWithDeadTerminators;
+ SmallSetVector<BasicBlock *, 16> BlocksWithDeadTerminators;
/// The set of blocks which we have determined whose control
/// dependence sources must be live and which have not had
@@ -390,7 +390,7 @@ void AggressiveDeadCodeElimination::markLive(Instruction *I) {
// Mark the containing block live
auto &BBInfo = *Info.Block;
if (BBInfo.Terminator == I) {
- BlocksWithDeadTerminators.erase(BBInfo.BB);
+ BlocksWithDeadTerminators.remove(BBInfo.BB);
// For live terminators, mark destination blocks
// live to preserve this control flow edges.
if (!BBInfo.UnconditionalBranch)
@@ -479,10 +479,14 @@ void AggressiveDeadCodeElimination::markLiveBranchesFromControlDependences() {
// which currently have dead terminators that are control
// dependence sources of a block which is in NewLiveBlocks.
+ const SmallPtrSet<BasicBlock *, 16> BWDT{
+ BlocksWithDeadTerminators.begin(),
+ BlocksWithDeadTerminators.end()
+ };
SmallVector<BasicBlock *, 32> IDFBlocks;
ReverseIDFCalculator IDFs(PDT);
IDFs.setDefiningBlocks(NewLiveBlocks);
- IDFs.setLiveInBlocks(BlocksWithDeadTerminators);
+ IDFs.setLiveInBlocks(BWDT);
IDFs.calculate(IDFBlocks);
NewLiveBlocks.clear();
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 0830ff5dd042..de9a62e88c27 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -1,10 +1,9 @@
//===----------------------- AlignmentFromAssumptions.cpp -----------------===//
// Set Load/Store Alignments From Assumptions
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp
index d3c9b9a270aa..9bd387c33e80 100644
--- a/lib/Transforms/Scalar/BDCE.cpp
+++ b/lib/Transforms/Scalar/BDCE.cpp
@@ -1,9 +1,8 @@
//===---- BDCE.cpp - Bit-tracking dead code elimination -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,14 +41,17 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
"Trivializing a non-integer value?");
// Initialize the worklist with eligible direct users.
+ SmallPtrSet<Instruction *, 16> Visited;
SmallVector<Instruction *, 16> WorkList;
for (User *JU : I->users()) {
// If all bits of a user are demanded, then we know that nothing below that
// in the def-use chain needs to be changed.
auto *J = dyn_cast<Instruction>(JU);
if (J && J->getType()->isIntOrIntVectorTy() &&
- !DB.getDemandedBits(J).isAllOnesValue())
+ !DB.getDemandedBits(J).isAllOnesValue()) {
+ Visited.insert(J);
WorkList.push_back(J);
+ }
// Note that we need to check for non-int types above before asking for
// demanded bits. Normally, the only way to reach an instruction with an
@@ -62,7 +64,6 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
}
// DFS through subsequent users while tracking visits to avoid cycles.
- SmallPtrSet<Instruction *, 16> Visited;
while (!WorkList.empty()) {
Instruction *J = WorkList.pop_back_val();
@@ -73,13 +74,11 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
// 1. llvm.assume demands its operand, so trivializing can't change it.
// 2. range metadata only applies to memory accesses which demand all bits.
- Visited.insert(J);
-
for (User *KU : J->users()) {
// If all bits of a user are demanded, then we know that nothing below
// that in the def-use chain needs to be changed.
auto *K = dyn_cast<Instruction>(KU);
- if (K && !Visited.count(K) && K->getType()->isIntOrIntVectorTy() &&
+ if (K && Visited.insert(K).second && K->getType()->isIntOrIntVectorTy() &&
!DB.getDemandedBits(K).isAllOnesValue())
WorkList.push_back(K);
}
diff --git a/lib/Transforms/Scalar/CallSiteSplitting.cpp b/lib/Transforms/Scalar/CallSiteSplitting.cpp
index a806d6faed60..3519b000a33f 100644
--- a/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -1,9 +1,8 @@
//===- CallSiteSplitting.cpp ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -184,6 +183,9 @@ static SmallVector<BasicBlock *, 2> getTwoPredecessors(BasicBlock *BB) {
}
static bool canSplitCallSite(CallSite CS, TargetTransformInfo &TTI) {
+ if (CS.isConvergent() || CS.cannotDuplicate())
+ return false;
+
// FIXME: As of now we handle only CallInst. InvokeInst could be handled
// without too much effort.
Instruction *Instr = CS.getInstruction();
@@ -367,7 +369,7 @@ static void splitCallSite(
assert(Splits.size() == 2 && "Expected exactly 2 splits!");
for (unsigned i = 0; i < Splits.size(); i++) {
Splits[i]->getTerminator()->eraseFromParent();
- DTU.deleteEdge(Splits[i], TailBB);
+ DTU.applyUpdatesPermissive({{DominatorTree::Delete, Splits[i], TailBB}});
}
// Erase the tail block once done with musttail patching
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index beac0d967a98..98243a23f1ef 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -1,9 +1,8 @@
//===- ConstantHoisting.cpp - Prepare code for expensive constants --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,6 +41,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
@@ -61,6 +61,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -112,11 +113,10 @@ public:
if (ConstHoistWithBlockFrequency)
AU.addRequired<BlockFrequencyInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
- void releaseMemory() override { Impl.releaseMemory(); }
-
private:
ConstantHoistingPass Impl;
};
@@ -129,6 +129,7 @@ INITIALIZE_PASS_BEGIN(ConstantHoistingLegacyPass, "consthoist",
"Constant Hoisting", false, false)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist",
"Constant Hoisting", false, false)
@@ -151,7 +152,8 @@ bool ConstantHoistingLegacyPass::runOnFunction(Function &Fn) {
ConstHoistWithBlockFrequency
? &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI()
: nullptr,
- Fn.getEntryBlock());
+ Fn.getEntryBlock(),
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
if (MadeChange) {
LLVM_DEBUG(dbgs() << "********** Function after Constant Hoisting: "
@@ -211,6 +213,9 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
// in the dominator tree from Entry to 'BB'.
SmallPtrSet<BasicBlock *, 16> Candidates;
for (auto BB : BBs) {
+ // Ignore unreachable basic blocks.
+ if (!DT.isReachableFromEntry(BB))
+ continue;
Path.clear();
// Walk up the dominator tree until Entry or another BB in BBs
// is reached. Insert the nodes on the way to the Path.
@@ -548,7 +553,9 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
ConstCandVecType::iterator &MaxCostItr) {
unsigned NumUses = 0;
- if(!Entry->getParent()->optForSize() || std::distance(S,E) > 100) {
+ bool OptForSize = Entry->getParent()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(Entry->getParent(), PSI, BFI);
+ if (!OptForSize || std::distance(S,E) > 100) {
for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
NumUses += ConstCand->Uses.size();
if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
@@ -640,8 +647,8 @@ void ConstantHoistingPass::findBaseConstants(GlobalVariable *BaseGV) {
ConstGEPInfoMap[BaseGV] : ConstIntInfoVec;
// Sort the constants by value and type. This invalidates the mapping!
- std::stable_sort(ConstCandVec.begin(), ConstCandVec.end(),
- [](const ConstantCandidate &LHS, const ConstantCandidate &RHS) {
+ llvm::stable_sort(ConstCandVec, [](const ConstantCandidate &LHS,
+ const ConstantCandidate &RHS) {
if (LHS.ConstInt->getType() != RHS.ConstInt->getType())
return LHS.ConstInt->getType()->getBitWidth() <
RHS.ConstInt->getType()->getBitWidth();
@@ -824,7 +831,9 @@ bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) {
BaseGV ? ConstGEPInfoMap[BaseGV] : ConstIntInfoVec;
for (auto const &ConstInfo : ConstInfoVec) {
SmallPtrSet<Instruction *, 8> IPSet = findConstantInsertionPoint(ConstInfo);
- assert(!IPSet.empty() && "IPSet is empty");
+ // We can have an empty set if the function contains unreachable blocks.
+ if (IPSet.empty())
+ continue;
unsigned UsesNum = 0;
unsigned ReBasesNum = 0;
@@ -917,13 +926,14 @@ void ConstantHoistingPass::deleteDeadCastInst() const {
/// Optimize expensive integer constants in the given function.
bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
DominatorTree &DT, BlockFrequencyInfo *BFI,
- BasicBlock &Entry) {
+ BasicBlock &Entry, ProfileSummaryInfo *PSI) {
this->TTI = &TTI;
this->DT = &DT;
this->BFI = BFI;
this->DL = &Fn.getParent()->getDataLayout();
this->Ctx = &Fn.getContext();
this->Entry = &Entry;
+ this->PSI = PSI;
// Collect all constant candidates.
collectConstantCandidates(Fn);
@@ -948,6 +958,8 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
// Cleanup dead instructions.
deleteDeadCastInst();
+ cleanup();
+
return MadeChange;
}
@@ -958,7 +970,9 @@ PreservedAnalyses ConstantHoistingPass::run(Function &F,
auto BFI = ConstHoistWithBlockFrequency
? &AM.getResult<BlockFrequencyAnalysis>(F)
: nullptr;
- if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock()))
+ auto &MAM = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+ auto *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock(), PSI))
return PreservedAnalyses::all();
PreservedAnalyses PA;
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 51032b0625f8..770321c740a0 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -1,9 +1,8 @@
//===- ConstantProp.cpp - Code to perform Simple Constant Propagation -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index d0105701c73f..89497177524f 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -1,9 +1,8 @@
//===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,6 +15,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
@@ -27,7 +27,6 @@
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
@@ -64,8 +63,10 @@ STATISTIC(NumUDivs, "Number of udivs whose width was decreased");
STATISTIC(NumAShrs, "Number of ashr converted to lshr");
STATISTIC(NumSRems, "Number of srem converted to urem");
STATISTIC(NumOverflows, "Number of overflow checks removed");
+STATISTIC(NumSaturating,
+ "Number of saturating arithmetics converted to normal arithmetics");
-static cl::opt<bool> DontProcessAdds("cvp-dont-process-adds", cl::init(true));
+static cl::opt<bool> DontAddNoWrapFlags("cvp-dont-add-nowrap-flags", cl::init(false));
namespace {
@@ -307,11 +308,11 @@ static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
/// that cannot fire no matter what the incoming edge can safely be removed. If
/// a case fires on every incoming edge then the entire switch can be removed
/// and replaced with a branch to the case destination.
-static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
+static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
DominatorTree *DT) {
DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
- Value *Cond = SI->getCondition();
- BasicBlock *BB = SI->getParent();
+ Value *Cond = I->getCondition();
+ BasicBlock *BB = I->getParent();
// If the condition was defined in same block as the switch then LazyValueInfo
// currently won't say anything useful about it, though in theory it could.
@@ -328,67 +329,72 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
for (auto *Succ : successors(BB))
SuccessorsCount[Succ]++;
- for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
- ConstantInt *Case = CI->getCaseValue();
-
- // Check to see if the switch condition is equal to/not equal to the case
- // value on every incoming edge, equal/not equal being the same each time.
- LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
- for (pred_iterator PI = PB; PI != PE; ++PI) {
- // Is the switch condition equal to the case value?
- LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
- Cond, Case, *PI,
- BB, SI);
- // Give up on this case if nothing is known.
- if (Value == LazyValueInfo::Unknown) {
- State = LazyValueInfo::Unknown;
- break;
+ { // Scope for SwitchInstProfUpdateWrapper. It must not live during
+ // ConstantFoldTerminator() as the underlying SwitchInst can be changed.
+ SwitchInstProfUpdateWrapper SI(*I);
+
+ for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
+ ConstantInt *Case = CI->getCaseValue();
+
+ // Check to see if the switch condition is equal to/not equal to the case
+ // value on every incoming edge, equal/not equal being the same each time.
+ LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ // Is the switch condition equal to the case value?
+ LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
+ Cond, Case, *PI,
+ BB, SI);
+ // Give up on this case if nothing is known.
+ if (Value == LazyValueInfo::Unknown) {
+ State = LazyValueInfo::Unknown;
+ break;
+ }
+
+ // If this was the first edge to be visited, record that all other edges
+ // need to give the same result.
+ if (PI == PB) {
+ State = Value;
+ continue;
+ }
+
+ // If this case is known to fire for some edges and known not to fire for
+ // others then there is nothing we can do - give up.
+ if (Value != State) {
+ State = LazyValueInfo::Unknown;
+ break;
+ }
}
- // If this was the first edge to be visited, record that all other edges
- // need to give the same result.
- if (PI == PB) {
- State = Value;
+ if (State == LazyValueInfo::False) {
+ // This case never fires - remove it.
+ BasicBlock *Succ = CI->getCaseSuccessor();
+ Succ->removePredecessor(BB);
+ CI = SI.removeCase(CI);
+ CE = SI->case_end();
+
+ // The condition can be modified by removePredecessor's PHI simplification
+ // logic.
+ Cond = SI->getCondition();
+
+ ++NumDeadCases;
+ Changed = true;
+ if (--SuccessorsCount[Succ] == 0)
+ DTU.applyUpdatesPermissive({{DominatorTree::Delete, BB, Succ}});
continue;
}
-
- // If this case is known to fire for some edges and known not to fire for
- // others then there is nothing we can do - give up.
- if (Value != State) {
- State = LazyValueInfo::Unknown;
+ if (State == LazyValueInfo::True) {
+ // This case always fires. Arrange for the switch to be turned into an
+ // unconditional branch by replacing the switch condition with the case
+ // value.
+ SI->setCondition(Case);
+ NumDeadCases += SI->getNumCases();
+ Changed = true;
break;
}
- }
- if (State == LazyValueInfo::False) {
- // This case never fires - remove it.
- BasicBlock *Succ = CI->getCaseSuccessor();
- Succ->removePredecessor(BB);
- CI = SI->removeCase(CI);
- CE = SI->case_end();
-
- // The condition can be modified by removePredecessor's PHI simplification
- // logic.
- Cond = SI->getCondition();
-
- ++NumDeadCases;
- Changed = true;
- if (--SuccessorsCount[Succ] == 0)
- DTU.deleteEdge(BB, Succ);
- continue;
- }
- if (State == LazyValueInfo::True) {
- // This case always fires. Arrange for the switch to be turned into an
- // unconditional branch by replacing the switch condition with the case
- // value.
- SI->setCondition(Case);
- NumDeadCases += SI->getNumCases();
- Changed = true;
- break;
+ // Increment the case iterator since we didn't delete it.
+ ++CI;
}
-
- // Increment the case iterator since we didn't delete it.
- ++CI;
}
if (Changed)
@@ -399,56 +405,48 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
return Changed;
}
-// See if we can prove that the given overflow intrinsic will not overflow.
-static bool willNotOverflow(IntrinsicInst *II, LazyValueInfo *LVI) {
- using OBO = OverflowingBinaryOperator;
- auto NoWrap = [&] (Instruction::BinaryOps BinOp, unsigned NoWrapKind) {
- Value *RHS = II->getOperand(1);
- ConstantRange RRange = LVI->getConstantRange(RHS, II->getParent(), II);
- ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
- BinOp, RRange, NoWrapKind);
- // As an optimization, do not compute LRange if we do not need it.
- if (NWRegion.isEmptySet())
- return false;
- Value *LHS = II->getOperand(0);
- ConstantRange LRange = LVI->getConstantRange(LHS, II->getParent(), II);
- return NWRegion.contains(LRange);
- };
- switch (II->getIntrinsicID()) {
- default:
- break;
- case Intrinsic::uadd_with_overflow:
- return NoWrap(Instruction::Add, OBO::NoUnsignedWrap);
- case Intrinsic::sadd_with_overflow:
- return NoWrap(Instruction::Add, OBO::NoSignedWrap);
- case Intrinsic::usub_with_overflow:
- return NoWrap(Instruction::Sub, OBO::NoUnsignedWrap);
- case Intrinsic::ssub_with_overflow:
- return NoWrap(Instruction::Sub, OBO::NoSignedWrap);
- }
- return false;
+// See if we can prove that the given binary op intrinsic will not overflow.
+static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI) {
+ ConstantRange LRange = LVI->getConstantRange(
+ BO->getLHS(), BO->getParent(), BO);
+ ConstantRange RRange = LVI->getConstantRange(
+ BO->getRHS(), BO->getParent(), BO);
+ ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
+ BO->getBinaryOp(), RRange, BO->getNoWrapKind());
+ return NWRegion.contains(LRange);
}
-static void processOverflowIntrinsic(IntrinsicInst *II) {
- IRBuilder<> B(II);
- Value *NewOp = nullptr;
- switch (II->getIntrinsicID()) {
- default:
- llvm_unreachable("Unexpected instruction.");
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::sadd_with_overflow:
- NewOp = B.CreateAdd(II->getOperand(0), II->getOperand(1), II->getName());
- break;
- case Intrinsic::usub_with_overflow:
- case Intrinsic::ssub_with_overflow:
- NewOp = B.CreateSub(II->getOperand(0), II->getOperand(1), II->getName());
- break;
+static void processOverflowIntrinsic(WithOverflowInst *WO) {
+ IRBuilder<> B(WO);
+ Value *NewOp = B.CreateBinOp(
+ WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), WO->getName());
+ // Constant-folding could have happened.
+ if (auto *Inst = dyn_cast<Instruction>(NewOp)) {
+ if (WO->isSigned())
+ Inst->setHasNoSignedWrap();
+ else
+ Inst->setHasNoUnsignedWrap();
}
+
+ Value *NewI = B.CreateInsertValue(UndefValue::get(WO->getType()), NewOp, 0);
+ NewI = B.CreateInsertValue(NewI, ConstantInt::getFalse(WO->getContext()), 1);
+ WO->replaceAllUsesWith(NewI);
+ WO->eraseFromParent();
++NumOverflows;
- Value *NewI = B.CreateInsertValue(UndefValue::get(II->getType()), NewOp, 0);
- NewI = B.CreateInsertValue(NewI, ConstantInt::getFalse(II->getContext()), 1);
- II->replaceAllUsesWith(NewI);
- II->eraseFromParent();
+}
+
+static void processSaturatingInst(SaturatingInst *SI) {
+ BinaryOperator *BinOp = BinaryOperator::Create(
+ SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI);
+ BinOp->setDebugLoc(SI->getDebugLoc());
+ if (SI->isSigned())
+ BinOp->setHasNoSignedWrap();
+ else
+ BinOp->setHasNoUnsignedWrap();
+
+ SI->replaceAllUsesWith(BinOp);
+ SI->eraseFromParent();
+ ++NumSaturating;
}
/// Infer nonnull attributes for the arguments at the specified callsite.
@@ -456,13 +454,44 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
SmallVector<unsigned, 4> ArgNos;
unsigned ArgNo = 0;
- if (auto *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
- if (willNotOverflow(II, LVI)) {
- processOverflowIntrinsic(II);
+ if (auto *WO = dyn_cast<WithOverflowInst>(CS.getInstruction())) {
+ if (WO->getLHS()->getType()->isIntegerTy() && willNotOverflow(WO, LVI)) {
+ processOverflowIntrinsic(WO);
+ return true;
+ }
+ }
+
+ if (auto *SI = dyn_cast<SaturatingInst>(CS.getInstruction())) {
+ if (SI->getType()->isIntegerTy() && willNotOverflow(SI, LVI)) {
+ processSaturatingInst(SI);
return true;
}
}
+ // Deopt bundle operands are intended to capture state with minimal
+ // perturbance of the code otherwise. If we can find a constant value for
+ // any such operand and remove a use of the original value, that's
+ // desireable since it may allow further optimization of that value (e.g. via
+ // single use rules in instcombine). Since deopt uses tend to,
+ // idiomatically, appear along rare conditional paths, it's reasonable likely
+ // we may have a conditional fact with which LVI can fold.
+ if (auto DeoptBundle = CS.getOperandBundle(LLVMContext::OB_deopt)) {
+ bool Progress = false;
+ for (const Use &ConstU : DeoptBundle->Inputs) {
+ Use &U = const_cast<Use&>(ConstU);
+ Value *V = U.get();
+ if (V->getType()->isVectorTy()) continue;
+ if (isa<Constant>(V)) continue;
+
+ Constant *C = LVI->getConstant(V, CS.getParent(), CS.getInstruction());
+ if (!C) continue;
+ U.set(C);
+ Progress = true;
+ }
+ if (Progress)
+ return true;
+ }
+
for (Value *V : CS.args()) {
PointerType *Type = dyn_cast<PointerType>(V->getType());
// Try to mark pointer typed parameters as non-null. We skip the
@@ -512,7 +541,7 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
// Find the smallest power of two bitwidth that's sufficient to hold Instr's
// operands.
auto OrigWidth = Instr->getType()->getIntegerBitWidth();
- ConstantRange OperandRange(OrigWidth, /*isFullset=*/false);
+ ConstantRange OperandRange(OrigWidth, /*isFullSet=*/false);
for (Value *Operand : Instr->operands()) {
OperandRange = OperandRange.unionWith(
LVI->getConstantRange(Operand, Instr->getParent()));
@@ -603,55 +632,42 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
return true;
}
-static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) {
+static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
using OBO = OverflowingBinaryOperator;
- if (DontProcessAdds)
+ if (DontAddNoWrapFlags)
return false;
- if (AddOp->getType()->isVectorTy())
+ if (BinOp->getType()->isVectorTy())
return false;
- bool NSW = AddOp->hasNoSignedWrap();
- bool NUW = AddOp->hasNoUnsignedWrap();
+ bool NSW = BinOp->hasNoSignedWrap();
+ bool NUW = BinOp->hasNoUnsignedWrap();
if (NSW && NUW)
return false;
- BasicBlock *BB = AddOp->getParent();
+ BasicBlock *BB = BinOp->getParent();
- Value *LHS = AddOp->getOperand(0);
- Value *RHS = AddOp->getOperand(1);
+ Value *LHS = BinOp->getOperand(0);
+ Value *RHS = BinOp->getOperand(1);
- ConstantRange LRange = LVI->getConstantRange(LHS, BB, AddOp);
-
- // Initialize RRange only if we need it. If we know that guaranteed no wrap
- // range for the given LHS range is empty don't spend time calculating the
- // range for the RHS.
- Optional<ConstantRange> RRange;
- auto LazyRRange = [&] () {
- if (!RRange)
- RRange = LVI->getConstantRange(RHS, BB, AddOp);
- return RRange.getValue();
- };
+ ConstantRange LRange = LVI->getConstantRange(LHS, BB, BinOp);
+ ConstantRange RRange = LVI->getConstantRange(RHS, BB, BinOp);
bool Changed = false;
if (!NUW) {
ConstantRange NUWRange = ConstantRange::makeGuaranteedNoWrapRegion(
- BinaryOperator::Add, LRange, OBO::NoUnsignedWrap);
- if (!NUWRange.isEmptySet()) {
- bool NewNUW = NUWRange.contains(LazyRRange());
- AddOp->setHasNoUnsignedWrap(NewNUW);
- Changed |= NewNUW;
- }
+ BinOp->getOpcode(), RRange, OBO::NoUnsignedWrap);
+ bool NewNUW = NUWRange.contains(LRange);
+ BinOp->setHasNoUnsignedWrap(NewNUW);
+ Changed |= NewNUW;
}
if (!NSW) {
ConstantRange NSWRange = ConstantRange::makeGuaranteedNoWrapRegion(
- BinaryOperator::Add, LRange, OBO::NoSignedWrap);
- if (!NSWRange.isEmptySet()) {
- bool NewNSW = NSWRange.contains(LazyRRange());
- AddOp->setHasNoSignedWrap(NewNSW);
- Changed |= NewNSW;
- }
+ BinOp->getOpcode(), RRange, OBO::NoSignedWrap);
+ bool NewNSW = NSWRange.contains(LRange);
+ BinOp->setHasNoSignedWrap(NewNSW);
+ Changed |= NewNSW;
}
return Changed;
@@ -725,7 +741,8 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
BBChanged |= processAShr(cast<BinaryOperator>(II), LVI);
break;
case Instruction::Add:
- BBChanged |= processAdd(cast<BinaryOperator>(II), LVI);
+ case Instruction::Sub:
+ BBChanged |= processBinOp(cast<BinaryOperator>(II), LVI);
break;
}
}
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 4c964e6e888c..479e0ed74074 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -1,9 +1,8 @@
//===- DCE.cpp - Code to perform dead code elimination --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 469930ca6a19..a81645745b48 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1,9 +1,8 @@
//===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -29,8 +28,8 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OrderedBasicBlock.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
@@ -57,6 +56,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -98,9 +98,8 @@ using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>;
static void
deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
MemoryDependenceResults &MD, const TargetLibraryInfo &TLI,
- InstOverlapIntervalsTy &IOL,
- DenseMap<Instruction*, size_t> *InstrOrdering,
- SmallSetVector<Value *, 16> *ValueSet = nullptr) {
+ InstOverlapIntervalsTy &IOL, OrderedBasicBlock &OBB,
+ SmallSetVector<const Value *, 16> *ValueSet = nullptr) {
SmallVector<Instruction*, 32> NowDeadInsts;
NowDeadInsts.push_back(I);
@@ -136,8 +135,8 @@ deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
}
if (ValueSet) ValueSet->remove(DeadInst);
- InstrOrdering->erase(DeadInst);
IOL.erase(DeadInst);
+ OBB.eraseInstruction(DeadInst);
if (NewIter == DeadInst->getIterator())
NewIter = DeadInst->eraseFromParent();
@@ -657,8 +656,7 @@ static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
static bool handleFree(CallInst *F, AliasAnalysis *AA,
MemoryDependenceResults *MD, DominatorTree *DT,
const TargetLibraryInfo *TLI,
- InstOverlapIntervalsTy &IOL,
- DenseMap<Instruction*, size_t> *InstrOrdering) {
+ InstOverlapIntervalsTy &IOL, OrderedBasicBlock &OBB) {
bool MadeChange = false;
MemoryLocation Loc = MemoryLocation(F->getOperand(0));
@@ -692,7 +690,7 @@ static bool handleFree(CallInst *F, AliasAnalysis *AA,
// DCE instructions only used to calculate that store.
BasicBlock::iterator BBI(Dependency);
- deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, InstrOrdering);
+ deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, OBB);
++NumFastStores;
MadeChange = true;
@@ -715,7 +713,7 @@ static bool handleFree(CallInst *F, AliasAnalysis *AA,
/// the DeadStackObjects set. If so, they become live because the location is
/// being loaded.
static void removeAccessedObjects(const MemoryLocation &LoadedLoc,
- SmallSetVector<Value *, 16> &DeadStackObjects,
+ SmallSetVector<const Value *, 16> &DeadStackObjects,
const DataLayout &DL, AliasAnalysis *AA,
const TargetLibraryInfo *TLI,
const Function *F) {
@@ -728,12 +726,12 @@ static void removeAccessedObjects(const MemoryLocation &LoadedLoc,
// If the kill pointer can be easily reduced to an alloca, don't bother doing
// extraneous AA queries.
if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
- DeadStackObjects.remove(const_cast<Value*>(UnderlyingPointer));
+ DeadStackObjects.remove(UnderlyingPointer);
return;
}
// Remove objects that could alias LoadedLoc.
- DeadStackObjects.remove_if([&](Value *I) {
+ DeadStackObjects.remove_if([&](const Value *I) {
// See if the loaded location could alias the stack location.
MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI, F));
return !AA->isNoAlias(StackLoc, LoadedLoc);
@@ -747,15 +745,15 @@ static void removeAccessedObjects(const MemoryLocation &LoadedLoc,
/// store i32 1, i32* %A
/// ret void
static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
- MemoryDependenceResults *MD,
- const TargetLibraryInfo *TLI,
- InstOverlapIntervalsTy &IOL,
- DenseMap<Instruction*, size_t> *InstrOrdering) {
+ MemoryDependenceResults *MD,
+ const TargetLibraryInfo *TLI,
+ InstOverlapIntervalsTy &IOL,
+ OrderedBasicBlock &OBB) {
bool MadeChange = false;
// Keep track of all of the stack objects that are dead at the end of the
// function.
- SmallSetVector<Value*, 16> DeadStackObjects;
+ SmallSetVector<const Value*, 16> DeadStackObjects;
// Find all of the alloca'd pointers in the entry block.
BasicBlock &Entry = BB.getParent()->front();
@@ -784,12 +782,12 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
// If we find a store, check to see if it points into a dead stack value.
if (hasAnalyzableMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) {
// See through pointer-to-pointer bitcasts
- SmallVector<Value *, 4> Pointers;
+ SmallVector<const Value *, 4> Pointers;
GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL);
// Stores to stack values are valid candidates for removal.
bool AllDead = true;
- for (Value *Pointer : Pointers)
+ for (const Value *Pointer : Pointers)
if (!DeadStackObjects.count(Pointer)) {
AllDead = false;
break;
@@ -800,7 +798,8 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
LLVM_DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: "
<< *Dead << "\n Objects: ";
- for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(),
+ for (SmallVectorImpl<const Value *>::iterator I =
+ Pointers.begin(),
E = Pointers.end();
I != E; ++I) {
dbgs() << **I;
@@ -810,7 +809,8 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
<< '\n');
// DCE instructions only used to calculate that store.
- deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, InstrOrdering, &DeadStackObjects);
+ deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, OBB,
+ &DeadStackObjects);
++NumFastStores;
MadeChange = true;
continue;
@@ -821,7 +821,8 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
if (isInstructionTriviallyDead(&*BBI, TLI)) {
LLVM_DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n DEAD: "
<< *&*BBI << '\n');
- deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, InstrOrdering, &DeadStackObjects);
+ deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, OBB,
+ &DeadStackObjects);
++NumFastOther;
MadeChange = true;
continue;
@@ -847,7 +848,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
// If the call might load from any of our allocas, then any store above
// the call is live.
- DeadStackObjects.remove_if([&](Value *I) {
+ DeadStackObjects.remove_if([&](const Value *I) {
// See if the call site touches the value.
return isRefSet(AA->getModRefInfo(
Call, I, getPointerSize(I, DL, *TLI, BB.getParent())));
@@ -946,7 +947,9 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset,
Value *Indices[1] = {
ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)};
GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
+ EarlierIntrinsic->getRawDest()->getType()->getPointerElementType(),
EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite);
+ NewDestGEP->setDebugLoc(EarlierIntrinsic->getDebugLoc());
EarlierIntrinsic->setDest(NewDestGEP);
EarlierOffset = EarlierOffset + OffsetMoved;
}
@@ -1025,7 +1028,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
const DataLayout &DL,
const TargetLibraryInfo *TLI,
InstOverlapIntervalsTy &IOL,
- DenseMap<Instruction*, size_t> *InstrOrdering) {
+ OrderedBasicBlock &OBB) {
// Must be a store instruction.
StoreInst *SI = dyn_cast<StoreInst>(Inst);
if (!SI)
@@ -1041,7 +1044,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: "
<< *DepLoad << "\n STORE: " << *SI << '\n');
- deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, InstrOrdering);
+ deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, OBB);
++NumRedundantStores;
return true;
}
@@ -1059,7 +1062,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: "
<< *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n');
- deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, InstrOrdering);
+ deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, OBB);
++NumRedundantStores;
return true;
}
@@ -1073,11 +1076,8 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
const DataLayout &DL = BB.getModule()->getDataLayout();
bool MadeChange = false;
- // FIXME: Maybe change this to use some abstraction like OrderedBasicBlock?
- // The current OrderedBasicBlock can't deal with mutation at the moment.
- size_t LastThrowingInstIndex = 0;
- DenseMap<Instruction*, size_t> InstrOrdering;
- size_t InstrIndex = 1;
+ OrderedBasicBlock OBB(&BB);
+ Instruction *LastThrowing = nullptr;
// A map of interval maps representing partially-overwritten value parts.
InstOverlapIntervalsTy IOL;
@@ -1086,7 +1086,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
// Handle 'free' calls specially.
if (CallInst *F = isFreeCall(&*BBI, TLI)) {
- MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, &InstrOrdering);
+ MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, OBB);
// Increment BBI after handleFree has potentially deleted instructions.
// This ensures we maintain a valid iterator.
++BBI;
@@ -1095,10 +1095,8 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
Instruction *Inst = &*BBI++;
- size_t CurInstNumber = InstrIndex++;
- InstrOrdering.insert(std::make_pair(Inst, CurInstNumber));
if (Inst->mayThrow()) {
- LastThrowingInstIndex = CurInstNumber;
+ LastThrowing = Inst;
continue;
}
@@ -1107,13 +1105,13 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
continue;
// eliminateNoopStore will update in iterator, if necessary.
- if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, &InstrOrdering)) {
+ if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, OBB)) {
MadeChange = true;
continue;
}
// If we find something that writes memory, get its memory dependence.
- MemDepResult InstDep = MD->getDependency(Inst);
+ MemDepResult InstDep = MD->getDependency(Inst, &OBB);
// Ignore any store where we can't find a local dependence.
// FIXME: cross-block DSE would be fun. :)
@@ -1158,9 +1156,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
// If the underlying object is a non-escaping memory allocation, any store
// to it is dead along the unwind edge. Otherwise, we need to preserve
// the store.
- size_t DepIndex = InstrOrdering.lookup(DepWrite);
- assert(DepIndex && "Unexpected instruction");
- if (DepIndex <= LastThrowingInstIndex) {
+ if (LastThrowing && OBB.dominates(DepWrite, LastThrowing)) {
const Value* Underlying = GetUnderlyingObject(DepLoc.Ptr, DL);
bool IsStoreDeadOnUnwind = isa<AllocaInst>(Underlying);
if (!IsStoreDeadOnUnwind) {
@@ -1191,12 +1187,12 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
<< "\n KILLER: " << *Inst << '\n');
// Delete the store and now-dead instructions that feed it.
- deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, &InstrOrdering);
+ deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, OBB);
++NumFastStores;
MadeChange = true;
// We erased DepWrite; start over.
- InstDep = MD->getDependency(Inst);
+ InstDep = MD->getDependency(Inst, &OBB);
continue;
} else if ((OR == OW_End && isShortenableAtTheEnd(DepWrite)) ||
((OR == OW_Begin &&
@@ -1215,12 +1211,17 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
auto *Earlier = dyn_cast<StoreInst>(DepWrite);
auto *Later = dyn_cast<StoreInst>(Inst);
if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) &&
+ DL.typeSizeEqualsStoreSize(
+ Earlier->getValueOperand()->getType()) &&
Later && isa<ConstantInt>(Later->getValueOperand()) &&
+ DL.typeSizeEqualsStoreSize(
+ Later->getValueOperand()->getType()) &&
memoryIsNotModifiedBetween(Earlier, Later, AA)) {
// If the store we find is:
// a) partially overwritten by the store to 'Loc'
// b) the later store is fully contained in the earlier one and
// c) they both have a constant value
+ // d) none of the two stores need padding
// Merge the two stores, replacing the earlier store's value with a
// merge of both values.
// TODO: Deal with other constant types (vectors, etc), and probably
@@ -1264,14 +1265,11 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
++NumModifiedStores;
// Remove earlier, wider, store
- size_t Idx = InstrOrdering.lookup(DepWrite);
- InstrOrdering.erase(DepWrite);
- InstrOrdering.insert(std::make_pair(SI, Idx));
+ OBB.replaceInstruction(DepWrite, SI);
// Delete the old stores and now-dead instructions that feed them.
- deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, &InstrOrdering);
- deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL,
- &InstrOrdering);
+ deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, OBB);
+ deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, OBB);
MadeChange = true;
// We erased DepWrite and Inst (Loc); start over.
@@ -1306,7 +1304,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
// If this block ends in a return, unwind, or unreachable, all allocas are
// dead at its end, which means stores to them are also dead.
if (BB.getTerminator()->getNumSuccessors() == 0)
- MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, &InstrOrdering);
+ MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, OBB);
return MadeChange;
}
diff --git a/lib/Transforms/Scalar/DivRemPairs.cpp b/lib/Transforms/Scalar/DivRemPairs.cpp
index ffcf34f1cf7a..876681b4f9de 100644
--- a/lib/Transforms/Scalar/DivRemPairs.cpp
+++ b/lib/Transforms/Scalar/DivRemPairs.cpp
@@ -1,9 +1,8 @@
//===- DivRemPairs.cpp - Hoist/decompose division and remainder -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 1f09979b3382..f1f075257020 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -1,9 +1,8 @@
//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -76,6 +75,16 @@ STATISTIC(NumDSE, "Number of trivial dead stores removed");
DEBUG_COUNTER(CSECounter, "early-cse",
"Controls which instructions are removed");
+static cl::opt<unsigned> EarlyCSEMssaOptCap(
+ "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
+ cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
+ "for faster compile. Caps the MemorySSA clobbering calls."));
+
+static cl::opt<bool> EarlyCSEDebugHash(
+ "earlycse-debug-hash", cl::init(false), cl::Hidden,
+ cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
+ "function is well-behaved w.r.t. its isEqual predicate"));
+
//===----------------------------------------------------------------------===//
// SimpleValue
//===----------------------------------------------------------------------===//
@@ -126,7 +135,33 @@ template <> struct DenseMapInfo<SimpleValue> {
} // end namespace llvm
-unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
+/// Match a 'select' including an optional 'not's of the condition.
+static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
+ Value *&B,
+ SelectPatternFlavor &Flavor) {
+ // Return false if V is not even a select.
+ if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))
+ return false;
+
+ // Look through a 'not' of the condition operand by swapping A/B.
+ Value *CondNot;
+ if (match(Cond, m_Not(m_Value(CondNot)))) {
+ Cond = CondNot;
+ std::swap(A, B);
+ }
+
+ // Set flavor if we find a match, or set it to unknown otherwise; in
+ // either case, return true to indicate that this is a select we can
+ // process.
+ if (auto *CmpI = dyn_cast<ICmpInst>(Cond))
+ Flavor = matchDecomposedSelectPattern(CmpI, A, B, A, B).Flavor;
+ else
+ Flavor = SPF_UNKNOWN;
+
+ return true;
+}
+
+static unsigned getHashValueImpl(SimpleValue Val) {
Instruction *Inst = Val.Inst;
// Hash in all of the operands as pointers.
if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
@@ -139,32 +174,56 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
}
if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
+ // Compares can be commuted by swapping the comparands and
+ // updating the predicate. Choose the form that has the
+ // comparands in sorted order, or in the case of a tie, the
+ // one with the lower predicate.
Value *LHS = CI->getOperand(0);
Value *RHS = CI->getOperand(1);
CmpInst::Predicate Pred = CI->getPredicate();
- if (Inst->getOperand(0) > Inst->getOperand(1)) {
+ CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();
+ if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {
std::swap(LHS, RHS);
- Pred = CI->getSwappedPredicate();
+ Pred = SwappedPred;
}
return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
}
- // Hash min/max/abs (cmp + select) to allow for commuted operands.
- // Min/max may also have non-canonical compare predicate (eg, the compare for
- // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
- // compare.
- Value *A, *B;
- SelectPatternFlavor SPF = matchSelectPattern(Inst, A, B).Flavor;
- // TODO: We should also detect FP min/max.
- if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
- SPF == SPF_UMIN || SPF == SPF_UMAX) {
- if (A > B)
+ // Hash general selects to allow matching commuted true/false operands.
+ SelectPatternFlavor SPF;
+ Value *Cond, *A, *B;
+ if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
+ // Hash min/max/abs (cmp + select) to allow for commuted operands.
+ // Min/max may also have non-canonical compare predicate (eg, the compare for
+ // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
+ // compare.
+ // TODO: We should also detect FP min/max.
+ if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
+ SPF == SPF_UMIN || SPF == SPF_UMAX) {
+ if (A > B)
+ std::swap(A, B);
+ return hash_combine(Inst->getOpcode(), SPF, A, B);
+ }
+ if (SPF == SPF_ABS || SPF == SPF_NABS) {
+ // ABS/NABS always puts the input in A and its negation in B.
+ return hash_combine(Inst->getOpcode(), SPF, A, B);
+ }
+
+ // Hash general selects to allow matching commuted true/false operands.
+
+ // If we do not have a compare as the condition, just hash in the condition.
+ CmpInst::Predicate Pred;
+ Value *X, *Y;
+ if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))
+ return hash_combine(Inst->getOpcode(), Cond, A, B);
+
+ // Similar to cmp normalization (above) - canonicalize the predicate value:
+ // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
+ if (CmpInst::getInversePredicate(Pred) < Pred) {
+ Pred = CmpInst::getInversePredicate(Pred);
std::swap(A, B);
- return hash_combine(Inst->getOpcode(), SPF, A, B);
- }
- if (SPF == SPF_ABS || SPF == SPF_NABS) {
- // ABS/NABS always puts the input in A and its negation in B.
- return hash_combine(Inst->getOpcode(), SPF, A, B);
+ }
+ return hash_combine(Inst->getOpcode(), Pred, X, Y, A, B);
}
if (CastInst *CI = dyn_cast<CastInst>(Inst))
@@ -179,8 +238,7 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
IVI->getOperand(1),
hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
- assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
- isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
+ assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
isa<ShuffleVectorInst>(Inst)) &&
"Invalid/unknown instruction");
@@ -191,7 +249,19 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
}
-bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
+unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
+#ifndef NDEBUG
+ // If -earlycse-debug-hash was specified, return a constant -- this
+ // will force all hashing to collide, so we'll exhaustively search
+ // the table for a match, and the assertion in isEqual will fire if
+ // there's a bug causing equal keys to hash differently.
+ if (EarlyCSEDebugHash)
+ return 0;
+#endif
+ return getHashValueImpl(Val);
+}
+
+static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
if (LHS.isSentinel() || RHS.isSentinel())
@@ -227,26 +297,68 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
// Min/max/abs can occur with commuted operands, non-canonical predicates,
// and/or non-canonical operands.
- Value *LHSA, *LHSB;
- SelectPatternFlavor LSPF = matchSelectPattern(LHSI, LHSA, LHSB).Flavor;
- // TODO: We should also detect FP min/max.
- if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
- LSPF == SPF_UMIN || LSPF == SPF_UMAX ||
- LSPF == SPF_ABS || LSPF == SPF_NABS) {
- Value *RHSA, *RHSB;
- SelectPatternFlavor RSPF = matchSelectPattern(RHSI, RHSA, RHSB).Flavor;
+ // Selects can be non-trivially equivalent via inverted conditions and swaps.
+ SelectPatternFlavor LSPF, RSPF;
+ Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
+ if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&
+ matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {
if (LSPF == RSPF) {
- // Abs results are placed in a defined order by matchSelectPattern.
- if (LSPF == SPF_ABS || LSPF == SPF_NABS)
+ // TODO: We should also detect FP min/max.
+ if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
+ LSPF == SPF_UMIN || LSPF == SPF_UMAX)
+ return ((LHSA == RHSA && LHSB == RHSB) ||
+ (LHSA == RHSB && LHSB == RHSA));
+
+ if (LSPF == SPF_ABS || LSPF == SPF_NABS) {
+ // Abs results are placed in a defined order by matchSelectPattern.
return LHSA == RHSA && LHSB == RHSB;
- return ((LHSA == RHSA && LHSB == RHSB) ||
- (LHSA == RHSB && LHSB == RHSA));
+ }
+
+ // select Cond, A, B <--> select not(Cond), B, A
+ if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
+ return true;
+ }
+
+ // If the true/false operands are swapped and the conditions are compares
+ // with inverted predicates, the selects are equal:
+ // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
+ //
+ // This also handles patterns with a double-negation in the sense of not +
+ // inverse, because we looked through a 'not' in the matching function and
+ // swapped A/B:
+ // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
+ //
+ // This intentionally does NOT handle patterns with a double-negation in
+ // the sense of not + not, because doing so could result in values
+ // comparing
+ // as equal that hash differently in the min/max/abs cases like:
+ // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
+ // ^ hashes as min ^ would not hash as min
+ // In the context of the EarlyCSE pass, however, such cases never reach
+ // this code, as we simplify the double-negation before hashing the second
+ // select (and so still succeed at CSEing them).
+ if (LHSA == RHSB && LHSB == RHSA) {
+ CmpInst::Predicate PredL, PredR;
+ Value *X, *Y;
+ if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&
+ match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&
+ CmpInst::getInversePredicate(PredL) == PredR)
+ return true;
}
}
return false;
}
+bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
+ // These comparisons are nontrivial, so assert that equality implies
+ // hash equality (DenseMap demands this as an invariant).
+ bool Result = isEqualImpl(LHS, RHS);
+ assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) ||
+ getHashValueImpl(LHS) == getHashValueImpl(RHS));
+ return Result;
+}
+
//===----------------------------------------------------------------------===//
// CallValue
//===----------------------------------------------------------------------===//
@@ -419,6 +531,7 @@ public:
bool run();
private:
+ unsigned ClobberCounter = 0;
// Almost a POD, but needs to call the constructors for the scoped hash
// tables so that a new scope gets pushed on. These are RAII so that the
// scope gets popped when the NodeScope is destroyed.
@@ -608,36 +721,11 @@ private:
MSSA->verifyMemorySSA();
// Removing a store here can leave MemorySSA in an unoptimized state by
// creating MemoryPhis that have identical arguments and by creating
- // MemoryUses whose defining access is not an actual clobber. We handle the
- // phi case eagerly here. The non-optimized MemoryUse case is lazily
- // updated by MemorySSA getClobberingMemoryAccess.
- if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst)) {
- // Optimize MemoryPhi nodes that may become redundant by having all the
- // same input values once MA is removed.
- SmallSetVector<MemoryPhi *, 4> PhisToCheck;
- SmallVector<MemoryAccess *, 8> WorkQueue;
- WorkQueue.push_back(MA);
- // Process MemoryPhi nodes in FIFO order using a ever-growing vector since
- // we shouldn't be processing that many phis and this will avoid an
- // allocation in almost all cases.
- for (unsigned I = 0; I < WorkQueue.size(); ++I) {
- MemoryAccess *WI = WorkQueue[I];
-
- for (auto *U : WI->users())
- if (MemoryPhi *MP = dyn_cast<MemoryPhi>(U))
- PhisToCheck.insert(MP);
-
- MSSAUpdater->removeMemoryAccess(WI);
-
- for (MemoryPhi *MP : PhisToCheck) {
- MemoryAccess *FirstIn = MP->getIncomingValue(0);
- if (llvm::all_of(MP->incoming_values(),
- [=](Use &In) { return In == FirstIn; }))
- WorkQueue.push_back(MP);
- }
- PhisToCheck.clear();
- }
- }
+ // MemoryUses whose defining access is not an actual clobber. The phi case
+ // is handled by MemorySSA when passing OptimizePhis = true to
+ // removeMemoryAccess. The non-optimized MemoryUse case is lazily updated
+ // by MemorySSA's getClobberingMemoryAccess.
+ MSSAUpdater->removeMemoryAccess(Inst, true);
}
};
@@ -688,8 +776,13 @@ bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
// LaterInst, if LaterDef dominates EarlierInst then it can't occur between
// EarlierInst and LaterInst and neither can any other write that potentially
// clobbers LaterInst.
- MemoryAccess *LaterDef =
- MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
+ MemoryAccess *LaterDef;
+ if (ClobberCounter < EarlyCSEMssaOptCap) {
+ LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
+ ClobberCounter++;
+ } else
+ LaterDef = LaterMA->getDefiningAccess();
+
return MSSA->dominates(LaterDef, EarlierMA);
}
@@ -1117,7 +1210,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// At the moment, we don't remove ordered stores, but do remove
// unordered atomic stores. There's no special requirement (for
// unordered atomics) about removing atomic stores only in favor of
- // other atomic stores since we we're going to execute the non-atomic
+ // other atomic stores since we were going to execute the non-atomic
// one anyway and the atomic one might never have become visible.
if (LastStore) {
ParseMemoryInst LastStoreMemInst(LastStore, TTI);
@@ -1184,8 +1277,7 @@ bool EarlyCSE::run() {
CurrentGeneration, DT.getRootNode(),
DT.getRootNode()->begin(), DT.getRootNode()->end()));
- // Save the current generation.
- unsigned LiveOutGeneration = CurrentGeneration;
+ assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
// Process the stack.
while (!nodesToProcess.empty()) {
@@ -1217,9 +1309,6 @@ bool EarlyCSE::run() {
}
} // while (!nodes...)
- // Reset the current generation.
- CurrentGeneration = LiveOutGeneration;
-
return Changed;
}
diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp
index 117b19fb8a42..31670b1464e4 100644
--- a/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -1,9 +1,8 @@
//===- FlattenCFGPass.cpp - CFG Flatten Pass ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp
index f2828e80bc58..4f83e869b303 100644
--- a/lib/Transforms/Scalar/Float2Int.cpp
+++ b/lib/Transforms/Scalar/Float2Int.cpp
@@ -1,9 +1,8 @@
//===- Float2Int.cpp - Demote floating point ops to work on integers ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -148,10 +147,10 @@ void Float2IntPass::seen(Instruction *I, ConstantRange R) {
// Helper - get a range representing a poison value.
ConstantRange Float2IntPass::badRange() {
- return ConstantRange(MaxIntegerBW + 1, true);
+ return ConstantRange::getFull(MaxIntegerBW + 1);
}
ConstantRange Float2IntPass::unknownRange() {
- return ConstantRange(MaxIntegerBW + 1, false);
+ return ConstantRange::getEmpty(MaxIntegerBW + 1);
}
ConstantRange Float2IntPass::validateRange(ConstantRange R) {
if (R.getBitWidth() > MaxIntegerBW + 1)
@@ -195,12 +194,13 @@ void Float2IntPass::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
// Path terminated cleanly - use the type of the integer input to seed
// the analysis.
unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
- auto Input = ConstantRange(BW, true);
+ auto Input = ConstantRange::getFull(BW);
auto CastOp = (Instruction::CastOps)I->getOpcode();
seen(I, validateRange(Input.castOp(CastOp, MaxIntegerBW+1)));
continue;
}
+ case Instruction::FNeg:
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
@@ -241,6 +241,15 @@ void Float2IntPass::walkForwards() {
case Instruction::SIToFP:
llvm_unreachable("Should have been handled in walkForwards!");
+ case Instruction::FNeg:
+ Op = [](ArrayRef<ConstantRange> Ops) {
+ assert(Ops.size() == 1 && "FNeg is a unary operator!");
+ unsigned Size = Ops[0].getBitWidth();
+ auto Zero = ConstantRange(APInt::getNullValue(Size));
+ return Zero.sub(Ops[0]);
+ };
+ break;
+
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
@@ -427,7 +436,7 @@ Value *Float2IntPass::convert(Instruction *I, Type *ToTy) {
} else if (Instruction *VI = dyn_cast<Instruction>(V)) {
NewOperands.push_back(convert(VI, ToTy));
} else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
- APSInt Val(ToTy->getPrimitiveSizeInBits(), /*IsUnsigned=*/false);
+ APSInt Val(ToTy->getPrimitiveSizeInBits(), /*isUnsigned=*/false);
bool Exact;
CF->getValueAPF().convertToInteger(Val,
APFloat::rmNearestTiesToEven,
@@ -467,6 +476,10 @@ Value *Float2IntPass::convert(Instruction *I, Type *ToTy) {
NewV = IRB.CreateSExtOrTrunc(NewOperands[0], ToTy);
break;
+ case Instruction::FNeg:
+ NewV = IRB.CreateNeg(NewOperands[0], I->getName());
+ break;
+
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 9861948c8297..1a02e9d33f49 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1,9 +1,8 @@
//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,6 +29,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -46,8 +46,8 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -330,36 +330,15 @@ GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
e.type = EI->getType();
e.opcode = 0;
- IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
- if (I != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
- // EI might be an extract from one of our recognised intrinsics. If it
- // is we'll synthesize a semantically equivalent expression instead on
- // an extract value expression.
- switch (I->getIntrinsicID()) {
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow:
- e.opcode = Instruction::Add;
- break;
- case Intrinsic::ssub_with_overflow:
- case Intrinsic::usub_with_overflow:
- e.opcode = Instruction::Sub;
- break;
- case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow:
- e.opcode = Instruction::Mul;
- break;
- default:
- break;
- }
-
- if (e.opcode != 0) {
- // Intrinsic recognized. Grab its args to finish building the expression.
- assert(I->getNumArgOperands() == 2 &&
- "Expect two args for recognised intrinsics.");
- e.varargs.push_back(lookupOrAdd(I->getArgOperand(0)));
- e.varargs.push_back(lookupOrAdd(I->getArgOperand(1)));
- return e;
- }
+ WithOverflowInst *WO = dyn_cast<WithOverflowInst>(EI->getAggregateOperand());
+ if (WO != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
+ // EI is an extract from one of our with.overflow intrinsics. Synthesize
+ // a semantically equivalent expression instead of an extract value
+ // expression.
+ e.opcode = WO->getBinaryOp();
+ e.varargs.push_back(lookupOrAdd(WO->getLHS()));
+ e.varargs.push_back(lookupOrAdd(WO->getRHS()));
+ return e;
}
// Not a recognised intrinsic. Fall back to producing an extract value
@@ -513,6 +492,7 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
switch (I->getOpcode()) {
case Instruction::Call:
return lookupOrAddCall(cast<CallInst>(I));
+ case Instruction::FNeg:
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -544,6 +524,7 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
case Instruction::FPExt:
case Instruction::PtrToInt:
case Instruction::IntToPtr:
+ case Instruction::AddrSpaceCast:
case Instruction::BitCast:
case Instruction::Select:
case Instruction::ExtractElement:
@@ -879,11 +860,12 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
const DataLayout &DL = LI->getModule()->getDataLayout();
+ Instruction *DepInst = DepInfo.getInst();
if (DepInfo.isClobber()) {
// If the dependence is to a store that writes to a superset of the bits
// read by the load, we can extract the bits we need for the load from the
// stored value.
- if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
+ if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
// Can't forward from non-atomic to atomic without violating memory model.
if (Address && LI->isAtomic() <= DepSI->isAtomic()) {
int Offset =
@@ -899,7 +881,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// load i32* P
// load i8* (P+1)
// if we have this, replace the later with an extraction from the former.
- if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
+ if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
// If this is a clobber and L is the first instruction in its block, then
// we have the first instruction in the entry block.
// Can't forward from non-atomic to atomic without violating memory model.
@@ -916,7 +898,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// If the clobbering value is a memset/memcpy/memmove, see if we can
// forward a value on from it.
- if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
+ if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInst)) {
if (Address && !LI->isAtomic()) {
int Offset = analyzeLoadFromClobberingMemInst(LI->getType(), Address,
DepMI, DL);
@@ -930,8 +912,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
LLVM_DEBUG(
// fast print dep, using operator<< on instruction is too slow.
dbgs() << "GVN: load "; LI->printAsOperand(dbgs());
- Instruction *I = DepInfo.getInst();
- dbgs() << " is clobbered by " << *I << '\n';);
+ dbgs() << " is clobbered by " << *DepInst << '\n';);
if (ORE->allowExtraAnalysis(DEBUG_TYPE))
reportMayClobberedLoad(LI, DepInfo, DT, ORE);
@@ -939,8 +920,6 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
}
assert(DepInfo.isDef() && "follows from above");
- Instruction *DepInst = DepInfo.getInst();
-
// Loading the allocation -> undef.
if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) ||
// Loading immediately after lifetime begin -> undef.
@@ -959,9 +938,8 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// Reject loads and stores that are to the same address but are of
// different types if we have to. If the stored value is larger or equal to
// the loaded value, we can reuse it.
- if (S->getValueOperand()->getType() != LI->getType() &&
- !canCoerceMustAliasedValueToLoad(S->getValueOperand(),
- LI->getType(), DL))
+ if (!canCoerceMustAliasedValueToLoad(S->getValueOperand(), LI->getType(),
+ DL))
return false;
// Can't forward from non-atomic to atomic without violating memory model.
@@ -976,8 +954,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// If the types mismatch and we can't handle it, reject reuse of the load.
// If the stored value is larger or equal to the loaded value, we can reuse
// it.
- if (LD->getType() != LI->getType() &&
- !canCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
+ if (!canCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
return false;
// Can't forward from non-atomic to atomic without violating memory model.
@@ -1132,6 +1109,14 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return false;
}
+ // FIXME: Can we support the fallthrough edge?
+ if (isa<CallBrInst>(Pred->getTerminator())) {
+ LLVM_DEBUG(
+ dbgs() << "COULD NOT PRE LOAD BECAUSE OF CALLBR CRITICAL EDGE '"
+ << Pred->getName() << "': " << *LI << '\n');
+ return false;
+ }
+
if (LoadBB->isEHPad()) {
LLVM_DEBUG(
dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
@@ -1220,9 +1205,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Instructions that have been inserted in predecessor(s) to materialize
// the load address do not retain their original debug locations. Doing
// so could lead to confusing (but correct) source attributions.
- // FIXME: How do we retain source locations without causing poor debugging
- // behavior?
- I->setDebugLoc(DebugLoc());
+ if (const DebugLoc &DL = I->getDebugLoc())
+ I->setDebugLoc(DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
// FIXME: We really _ought_ to insert these value numbers into their
// parent's availability map. However, in doing so, we risk getting into
@@ -1235,10 +1219,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
BasicBlock *UnavailablePred = PredLoad.first;
Value *LoadPtr = PredLoad.second;
- auto *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre",
- LI->isVolatile(), LI->getAlignment(),
- LI->getOrdering(), LI->getSyncScopeID(),
- UnavailablePred->getTerminator());
+ auto *NewLoad =
+ new LoadInst(LI->getType(), LoadPtr, LI->getName() + ".pre",
+ LI->isVolatile(), LI->getAlignment(), LI->getOrdering(),
+ LI->getSyncScopeID(), UnavailablePred->getTerminator());
NewLoad->setDebugLoc(LI->getDebugLoc());
// Transfer the old load's AA tags to the new load.
@@ -2168,8 +2152,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
return false;
// We don't currently value number ANY inline asm calls.
- if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
- if (CallI->isInlineAsm())
+ if (auto *CallB = dyn_cast<CallBase>(CurInst))
+ if (CallB->isInlineAsm())
return false;
uint32_t ValNo = VN.lookup(CurInst);
@@ -2252,6 +2236,11 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
if (isa<IndirectBrInst>(PREPred->getTerminator()))
return false;
+ // Don't do PRE across callbr.
+ // FIXME: Can we do this across the fallthrough edge?
+ if (isa<CallBrInst>(PREPred->getTerminator()))
+ return false;
+
// We can't do PRE safely on a critical edge, so instead we schedule
// the edge to be split and perform the PRE the next time we iterate
// on the function.
@@ -2479,8 +2468,7 @@ void GVN::addDeadBlock(BasicBlock *BB) {
for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) {
PHINode &Phi = cast<PHINode>(*II);
- Phi.setIncomingValue(Phi.getBasicBlockIndex(P),
- UndefValue::get(Phi.getType()));
+ Phi.setIncomingValueForBlock(P, UndefValue::get(Phi.getType()));
if (MD)
MD->invalidateCachedPointerInfo(&Phi);
}
diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp
index 76a42d7fe750..7614599653c4 100644
--- a/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/lib/Transforms/Scalar/GVNHoist.cpp
@@ -1,9 +1,8 @@
//===- GVNHoist.cpp - Hoist scalar and load expressions -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -703,7 +702,7 @@ private:
// Vector of PHIs contains PHIs for different instructions.
// Sort the args according to their VNs, such that identical
// instructions are together.
- std::stable_sort(CHIs.begin(), CHIs.end(), cmpVN);
+ llvm::stable_sort(CHIs, cmpVN);
auto TI = BB->getTerminator();
auto B = CHIs.begin();
// [PreIt, PHIIt) form a range of CHIs which have identical VNs.
diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp
index 1df5f5400c14..054025755c69 100644
--- a/lib/Transforms/Scalar/GVNSink.cpp
+++ b/lib/Transforms/Scalar/GVNSink.cpp
@@ -1,9 +1,8 @@
//===- GVNSink.cpp - sink expressions into successors ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -442,6 +441,7 @@ public:
break;
case Instruction::Call:
case Instruction::Invoke:
+ case Instruction::FNeg:
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -714,6 +714,15 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
// FIXME: If any of these fail, we should partition up the candidates to
// try and continue making progress.
Instruction *I0 = NewInsts[0];
+
+ // If all instructions that are going to participate don't have the same
+ // number of operands, we can't do any useful PHI analysis for all operands.
+ auto hasDifferentNumOperands = [&I0](Instruction *I) {
+ return I->getNumOperands() != I0->getNumOperands();
+ };
+ if (any_of(NewInsts, hasDifferentNumOperands))
+ return None;
+
for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
if (PHI.areAllIncomingValuesSame())
@@ -791,10 +800,7 @@ unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
--LRI;
}
- std::stable_sort(
- Candidates.begin(), Candidates.end(),
- [](const SinkingInstructionCandidate &A,
- const SinkingInstructionCandidate &B) { return A > B; });
+ llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());
LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
: Candidates) dbgs()
<< " " << C << "\n";);
diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp
index efc204d4f74b..e14f44bb7069 100644
--- a/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/lib/Transforms/Scalar/GuardWidening.cpp
@@ -1,9 +1,8 @@
//===- GuardWidening.cpp - ---- Guard widening ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -83,6 +82,11 @@ static cl::opt<unsigned> FrequentBranchThreshold(
"it is considered frequently taken"),
cl::init(1000));
+static cl::opt<bool>
+ WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden,
+ cl::desc("Whether or not we should widen guards "
+ "expressed as branches by widenable conditions"),
+ cl::init(true));
namespace {
@@ -93,6 +97,10 @@ static Value *getCondition(Instruction *I) {
"Bad guard intrinsic?");
return GI->getArgOperand(0);
}
+ if (isGuardAsWidenableBranch(I)) {
+ auto *Cond = cast<BranchInst>(I)->getCondition();
+ return cast<BinaryOperator>(Cond)->getOperand(0);
+ }
return cast<BranchInst>(I)->getCondition();
}
@@ -133,12 +141,12 @@ class GuardWideningImpl {
/// guards.
DenseSet<Instruction *> WidenedGuards;
- /// Try to eliminate guard \p Guard by widening it into an earlier dominating
- /// guard. \p DFSI is the DFS iterator on the dominator tree that is
- /// currently visiting the block containing \p Guard, and \p GuardsPerBlock
+ /// Try to eliminate instruction \p Instr by widening it into an earlier
+ /// dominating guard. \p DFSI is the DFS iterator on the dominator tree that
+ /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock
/// maps BasicBlocks to the set of guards seen in that block.
- bool eliminateGuardViaWidening(
- Instruction *Guard, const df_iterator<DomTreeNode *> &DFSI,
+ bool eliminateInstrViaWidening(
+ Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI,
const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
GuardsPerBlock, bool InvertCondition = false);
@@ -162,28 +170,25 @@ class GuardWideningImpl {
static StringRef scoreTypeToString(WideningScore WS);
- /// Compute the score for widening the condition in \p DominatedGuard
- /// (contained in \p DominatedGuardLoop) into \p DominatingGuard (contained in
- /// \p DominatingGuardLoop). If \p InvertCond is set, then we widen the
+ /// Compute the score for widening the condition in \p DominatedInstr
+ /// into \p DominatingGuard. If \p InvertCond is set, then we widen the
/// inverted condition of the dominating guard.
- WideningScore computeWideningScore(Instruction *DominatedGuard,
- Loop *DominatedGuardLoop,
+ WideningScore computeWideningScore(Instruction *DominatedInstr,
Instruction *DominatingGuard,
- Loop *DominatingGuardLoop,
bool InvertCond);
/// Helper to check if \p V can be hoisted to \p InsertPos.
- bool isAvailableAt(Value *V, Instruction *InsertPos) {
- SmallPtrSet<Instruction *, 8> Visited;
+ bool isAvailableAt(const Value *V, const Instruction *InsertPos) const {
+ SmallPtrSet<const Instruction *, 8> Visited;
return isAvailableAt(V, InsertPos, Visited);
}
- bool isAvailableAt(Value *V, Instruction *InsertPos,
- SmallPtrSetImpl<Instruction *> &Visited);
+ bool isAvailableAt(const Value *V, const Instruction *InsertPos,
+ SmallPtrSetImpl<const Instruction *> &Visited) const;
/// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c
/// isAvailableAt returned true.
- void makeAvailableAt(Value *V, Instruction *InsertPos);
+ void makeAvailableAt(Value *V, Instruction *InsertPos) const;
/// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try
/// to generate an expression computing the logical AND of \p Cond0 and (\p
@@ -200,23 +205,23 @@ class GuardWideningImpl {
/// pre-existing instruction in the IR that computes the result of this range
/// check.
class RangeCheck {
- Value *Base;
- ConstantInt *Offset;
- Value *Length;
+ const Value *Base;
+ const ConstantInt *Offset;
+ const Value *Length;
ICmpInst *CheckInst;
public:
- explicit RangeCheck(Value *Base, ConstantInt *Offset, Value *Length,
- ICmpInst *CheckInst)
+ explicit RangeCheck(const Value *Base, const ConstantInt *Offset,
+ const Value *Length, ICmpInst *CheckInst)
: Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {}
- void setBase(Value *NewBase) { Base = NewBase; }
- void setOffset(ConstantInt *NewOffset) { Offset = NewOffset; }
+ void setBase(const Value *NewBase) { Base = NewBase; }
+ void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; }
- Value *getBase() const { return Base; }
- ConstantInt *getOffset() const { return Offset; }
+ const Value *getBase() const { return Base; }
+ const ConstantInt *getOffset() const { return Offset; }
const APInt &getOffsetValue() const { return getOffset()->getValue(); }
- Value *getLength() const { return Length; };
+ const Value *getLength() const { return Length; };
ICmpInst *getCheckInst() const { return CheckInst; }
void print(raw_ostream &OS, bool PrintTypes = false) {
@@ -238,19 +243,19 @@ class GuardWideningImpl {
/// append them to \p Checks. Returns true on success, may clobber \c Checks
/// on failure.
bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks) {
- SmallPtrSet<Value *, 8> Visited;
+ SmallPtrSet<const Value *, 8> Visited;
return parseRangeChecks(CheckCond, Checks, Visited);
}
bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks,
- SmallPtrSetImpl<Value *> &Visited);
+ SmallPtrSetImpl<const Value *> &Visited);
/// Combine the checks in \p Checks into a smaller set of checks and append
/// them into \p CombinedChecks. Return true on success (i.e. all of checks
/// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks
/// and \p CombinedChecks on success and on failure.
bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks,
- SmallVectorImpl<RangeCheck> &CombinedChecks);
+ SmallVectorImpl<RangeCheck> &CombinedChecks) const;
/// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of
/// computing only one of the two expressions?
@@ -266,8 +271,16 @@ class GuardWideningImpl {
void widenGuard(Instruction *ToWiden, Value *NewCondition,
bool InvertCondition) {
Value *Result;
- widenCondCommon(ToWiden->getOperand(0), NewCondition, ToWiden, Result,
+ widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result,
InvertCondition);
+ Value *WidenableCondition = nullptr;
+ if (isGuardAsWidenableBranch(ToWiden)) {
+ auto *Cond = cast<BranchInst>(ToWiden)->getCondition();
+ WidenableCondition = cast<BinaryOperator>(Cond)->getOperand(1);
+ }
+ if (WidenableCondition)
+ Result = BinaryOperator::CreateAnd(Result, WidenableCondition,
+ "guard.chk", ToWiden);
setCondition(ToWiden, Result);
}
@@ -285,6 +298,14 @@ public:
};
}
+static bool isSupportedGuardInstruction(const Instruction *Insn) {
+ if (isGuard(Insn))
+ return true;
+ if (WidenBranchGuards && isGuardAsWidenableBranch(Insn))
+ return true;
+ return false;
+}
+
bool GuardWideningImpl::run() {
DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock;
bool Changed = false;
@@ -304,20 +325,20 @@ bool GuardWideningImpl::run() {
auto &CurrentList = GuardsInBlock[BB];
for (auto &I : *BB)
- if (isGuard(&I))
+ if (isSupportedGuardInstruction(&I))
CurrentList.push_back(cast<Instruction>(&I));
for (auto *II : CurrentList)
- Changed |= eliminateGuardViaWidening(II, DFI, GuardsInBlock);
+ Changed |= eliminateInstrViaWidening(II, DFI, GuardsInBlock);
if (WidenFrequentBranches && BPI)
if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator()))
if (BI->isConditional()) {
// If one of branches of a conditional is likely taken, try to
// eliminate it.
if (BPI->getEdgeProbability(BB, 0U) >= *LikelyTaken)
- Changed |= eliminateGuardViaWidening(BI, DFI, GuardsInBlock);
+ Changed |= eliminateInstrViaWidening(BI, DFI, GuardsInBlock);
else if (BPI->getEdgeProbability(BB, 1U) >= *LikelyTaken)
- Changed |= eliminateGuardViaWidening(BI, DFI, GuardsInBlock,
+ Changed |= eliminateInstrViaWidening(BI, DFI, GuardsInBlock,
/*InvertCondition*/true);
}
}
@@ -326,7 +347,7 @@ bool GuardWideningImpl::run() {
for (auto *I : EliminatedGuardsAndBranches)
if (!WidenedGuards.count(I)) {
assert(isa<ConstantInt>(getCondition(I)) && "Should be!");
- if (isGuard(I))
+ if (isSupportedGuardInstruction(I))
eliminateGuard(I);
else {
assert(isa<BranchInst>(I) &&
@@ -338,19 +359,18 @@ bool GuardWideningImpl::run() {
return Changed;
}
-bool GuardWideningImpl::eliminateGuardViaWidening(
- Instruction *GuardInst, const df_iterator<DomTreeNode *> &DFSI,
+bool GuardWideningImpl::eliminateInstrViaWidening(
+ Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI,
const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
GuardsInBlock, bool InvertCondition) {
// Ignore trivial true or false conditions. These instructions will be
// trivially eliminated by any cleanup pass. Do not erase them because other
// guards can possibly be widened into them.
- if (isa<ConstantInt>(getCondition(GuardInst)))
+ if (isa<ConstantInt>(getCondition(Instr)))
return false;
Instruction *BestSoFar = nullptr;
auto BestScoreSoFar = WS_IllegalOrNegative;
- auto *GuardInstLoop = LI.getLoopFor(GuardInst->getParent());
// In the set of dominating guards, find the one we can merge GuardInst with
// for the most profit.
@@ -358,12 +378,13 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
auto *CurBB = DFSI.getPath(i)->getBlock();
if (!BlockFilter(CurBB))
break;
- auto *CurLoop = LI.getLoopFor(CurBB);
assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!");
const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second;
auto I = GuardsInCurBB.begin();
- auto E = GuardsInCurBB.end();
+ auto E = Instr->getParent() == CurBB
+ ? std::find(GuardsInCurBB.begin(), GuardsInCurBB.end(), Instr)
+ : GuardsInCurBB.end();
#ifndef NDEBUG
{
@@ -379,21 +400,11 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
}
#endif
- assert((i == (e - 1)) == (GuardInst->getParent() == CurBB) && "Bad DFS?");
-
- if (i == (e - 1) && CurBB->getTerminator() != GuardInst) {
- // Corner case: make sure we're only looking at guards strictly dominating
- // GuardInst when visiting GuardInst->getParent().
- auto NewEnd = std::find(I, E, GuardInst);
- assert(NewEnd != E && "GuardInst not in its own block?");
- E = NewEnd;
- }
+ assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?");
for (auto *Candidate : make_range(I, E)) {
- auto Score =
- computeWideningScore(GuardInst, GuardInstLoop, Candidate, CurLoop,
- InvertCondition);
- LLVM_DEBUG(dbgs() << "Score between " << *getCondition(GuardInst)
+ auto Score = computeWideningScore(Instr, Candidate, InvertCondition);
+ LLVM_DEBUG(dbgs() << "Score between " << *getCondition(Instr)
<< " and " << *getCondition(Candidate) << " is "
<< scoreTypeToString(Score) << "\n");
if (Score > BestScoreSoFar) {
@@ -404,42 +415,45 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
}
if (BestScoreSoFar == WS_IllegalOrNegative) {
- LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *GuardInst << "\n");
+ LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n");
return false;
}
- assert(BestSoFar != GuardInst && "Should have never visited same guard!");
- assert(DT.dominates(BestSoFar, GuardInst) && "Should be!");
+ assert(BestSoFar != Instr && "Should have never visited same guard!");
+ assert(DT.dominates(BestSoFar, Instr) && "Should be!");
- LLVM_DEBUG(dbgs() << "Widening " << *GuardInst << " into " << *BestSoFar
+ LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar
<< " with score " << scoreTypeToString(BestScoreSoFar)
<< "\n");
- widenGuard(BestSoFar, getCondition(GuardInst), InvertCondition);
+ widenGuard(BestSoFar, getCondition(Instr), InvertCondition);
auto NewGuardCondition = InvertCondition
- ? ConstantInt::getFalse(GuardInst->getContext())
- : ConstantInt::getTrue(GuardInst->getContext());
- setCondition(GuardInst, NewGuardCondition);
- EliminatedGuardsAndBranches.push_back(GuardInst);
+ ? ConstantInt::getFalse(Instr->getContext())
+ : ConstantInt::getTrue(Instr->getContext());
+ setCondition(Instr, NewGuardCondition);
+ EliminatedGuardsAndBranches.push_back(Instr);
WidenedGuards.insert(BestSoFar);
return true;
}
-GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
- Instruction *DominatedGuard, Loop *DominatedGuardLoop,
- Instruction *DominatingGuard, Loop *DominatingGuardLoop, bool InvertCond) {
+GuardWideningImpl::WideningScore
+GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr,
+ Instruction *DominatingGuard,
+ bool InvertCond) {
+ Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent());
+ Loop *DominatingGuardLoop = LI.getLoopFor(DominatingGuard->getParent());
bool HoistingOutOfLoop = false;
- if (DominatingGuardLoop != DominatedGuardLoop) {
+ if (DominatingGuardLoop != DominatedInstrLoop) {
// Be conservative and don't widen into a sibling loop. TODO: If the
// sibling is colder, we should consider allowing this.
if (DominatingGuardLoop &&
- !DominatingGuardLoop->contains(DominatedGuardLoop))
+ !DominatingGuardLoop->contains(DominatedInstrLoop))
return WS_IllegalOrNegative;
HoistingOutOfLoop = true;
}
- if (!isAvailableAt(getCondition(DominatedGuard), DominatingGuard))
+ if (!isAvailableAt(getCondition(DominatedInstr), DominatingGuard))
return WS_IllegalOrNegative;
// If the guard was conditional executed, it may never be reached
@@ -450,7 +464,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
// here. TODO: evaluate cost model for spurious deopt
// NOTE: As written, this also lets us hoist right over another guard which
// is essentially just another spelling for control flow.
- if (isWideningCondProfitable(getCondition(DominatedGuard),
+ if (isWideningCondProfitable(getCondition(DominatedInstr),
getCondition(DominatingGuard), InvertCond))
return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive;
@@ -462,7 +476,9 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
// throw, etc...). That choice appears arbitrary.
auto MaybeHoistingOutOfIf = [&]() {
auto *DominatingBlock = DominatingGuard->getParent();
- auto *DominatedBlock = DominatedGuard->getParent();
+ auto *DominatedBlock = DominatedInstr->getParent();
+ if (isGuardAsWidenableBranch(DominatingGuard))
+ DominatingBlock = cast<BranchInst>(DominatingGuard)->getSuccessor(0);
// Same Block?
if (DominatedBlock == DominatingBlock)
@@ -478,8 +494,9 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral;
}
-bool GuardWideningImpl::isAvailableAt(Value *V, Instruction *Loc,
- SmallPtrSetImpl<Instruction *> &Visited) {
+bool GuardWideningImpl::isAvailableAt(
+ const Value *V, const Instruction *Loc,
+ SmallPtrSetImpl<const Instruction *> &Visited) const {
auto *Inst = dyn_cast<Instruction>(V);
if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst))
return true;
@@ -499,7 +516,7 @@ bool GuardWideningImpl::isAvailableAt(Value *V, Instruction *Loc,
[&](Value *Op) { return isAvailableAt(Op, Loc, Visited); });
}
-void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) {
+void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const {
auto *Inst = dyn_cast<Instruction>(V);
if (!Inst || DT.dominates(Inst, Loc))
return;
@@ -597,7 +614,7 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
bool GuardWideningImpl::parseRangeChecks(
Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks,
- SmallPtrSetImpl<Value *> &Visited) {
+ SmallPtrSetImpl<const Value *> &Visited) {
if (!Visited.insert(CheckCond).second)
return true;
@@ -616,7 +633,7 @@ bool GuardWideningImpl::parseRangeChecks(
IC->getPredicate() != ICmpInst::ICMP_UGT))
return false;
- Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1);
+ const Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1);
if (IC->getPredicate() == ICmpInst::ICMP_UGT)
std::swap(CmpLHS, CmpRHS);
@@ -669,13 +686,13 @@ bool GuardWideningImpl::parseRangeChecks(
bool GuardWideningImpl::combineRangeChecks(
SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks,
- SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) {
+ SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const {
unsigned OldCount = Checks.size();
while (!Checks.empty()) {
// Pick all of the range checks with a specific base and length, and try to
// merge them.
- Value *CurrentBase = Checks.front().getBase();
- Value *CurrentLength = Checks.front().getLength();
+ const Value *CurrentBase = Checks.front().getBase();
+ const Value *CurrentLength = Checks.front().getLength();
SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks;
@@ -704,8 +721,8 @@ bool GuardWideningImpl::combineRangeChecks(
// Note: std::sort should not invalidate the ChecksStart iterator.
- ConstantInt *MinOffset = CurrentChecks.front().getOffset(),
- *MaxOffset = CurrentChecks.back().getOffset();
+ const ConstantInt *MinOffset = CurrentChecks.front().getOffset();
+ const ConstantInt *MaxOffset = CurrentChecks.back().getOffset();
unsigned BitWidth = MaxOffset->getValue().getBitWidth();
if ((MaxOffset->getValue() - MinOffset->getValue())
@@ -800,6 +817,31 @@ PreservedAnalyses GuardWideningPass::run(Function &F,
return PA;
}
+PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+
+ const auto &FAM =
+ AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
+ Function &F = *L.getHeader()->getParent();
+ BranchProbabilityInfo *BPI = nullptr;
+ if (WidenFrequentBranches)
+ BPI = FAM.getCachedResult<BranchProbabilityAnalysis>(F);
+
+ BasicBlock *RootBB = L.getLoopPredecessor();
+ if (!RootBB)
+ RootBB = L.getHeader();
+ auto BlockFilter = [&](BasicBlock *BB) {
+ return BB == RootBB || L.contains(BB);
+ };
+ if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, BPI,
+ AR.DT.getNode(RootBB),
+ BlockFilter).run())
+ return PreservedAnalyses::all();
+
+ return getLoopPassPreservedAnalyses();
+}
+
namespace {
struct GuardWideningLegacyPass : public FunctionPass {
static char ID;
diff --git a/lib/Transforms/Scalar/IVUsersPrinter.cpp b/lib/Transforms/Scalar/IVUsersPrinter.cpp
index 807593379283..e2022aba97c4 100644
--- a/lib/Transforms/Scalar/IVUsersPrinter.cpp
+++ b/lib/Transforms/Scalar/IVUsersPrinter.cpp
@@ -1,9 +1,8 @@
//===- IVUsersPrinter.cpp - Induction Variable Users Printer ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 48d8e457ba7c..f9fc698a4a9b 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1,9 +1,8 @@
//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,6 +31,7 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -43,6 +43,7 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -101,7 +102,7 @@ static cl::opt<bool> VerifyIndvars(
"verify-indvars", cl::Hidden,
cl::desc("Verify the ScalarEvolution result after running indvars"));
-enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, AlwaysRepl };
+enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
static cl::opt<ReplaceExitVal> ReplaceExitValue(
"replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
@@ -109,6 +110,8 @@ static cl::opt<ReplaceExitVal> ReplaceExitValue(
cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
clEnumValN(OnlyCheapRepl, "cheap",
"only replace exit value when the cost is cheap"),
+ clEnumValN(NoHardUse, "noharduse",
+ "only replace exit values when loop def likely dead"),
clEnumValN(AlwaysRepl, "always",
"always replace exit value whenever possible")));
@@ -141,13 +144,15 @@ class IndVarSimplify {
bool rewriteNonIntegerIVs(Loop *L);
bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
+ bool optimizeLoopExits(Loop *L);
bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
bool rewriteFirstIterationLoopExitValues(Loop *L);
bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
- bool linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+ bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
+ const SCEV *ExitCount,
PHINode *IndVar, SCEVExpander &Rewriter);
bool sinkUnusedInvariants(Loop *L);
@@ -218,7 +223,9 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
/// Determine the insertion point for this user. By default, insert immediately
/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
-/// common dominator for the incoming blocks.
+/// common dominator for the incoming blocks. A nullptr can be returned if no
+/// viable location is found: it may happen if User is a PHI and Def only comes
+/// to this PHI from unreachable blocks.
static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
DominatorTree *DT, LoopInfo *LI) {
PHINode *PHI = dyn_cast<PHINode>(User);
@@ -231,6 +238,10 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
continue;
BasicBlock *InsertBB = PHI->getIncomingBlock(i);
+
+ if (!DT->isReachableFromEntry(InsertBB))
+ continue;
+
if (!InsertPt) {
InsertPt = InsertBB->getTerminator();
continue;
@@ -238,7 +249,11 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
InsertPt = InsertBB->getTerminator();
}
- assert(InsertPt && "Missing phi operand");
+
+ // If we have skipped all inputs, it means that Def only comes to Phi from
+ // unreachable blocks.
+ if (!InsertPt)
+ return nullptr;
auto *DefI = dyn_cast<Instruction>(Def);
if (!DefI)
@@ -621,8 +636,12 @@ bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
// Computing the value outside of the loop brings no benefit if it is
// definitely used inside the loop in a way which can not be optimized
- // away.
- if (!isa<SCEVConstant>(ExitValue) && hasHardUserWithinLoop(L, Inst))
+ // away. Avoid doing so unless we know we have a value which computes
+ // the ExitValue already. TODO: This should be merged into SCEV
+ // expander to leverage its knowledge of existing expressions.
+ if (ReplaceExitValue != AlwaysRepl &&
+ !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
+ hasHardUserWithinLoop(L, Inst))
continue;
bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
@@ -707,8 +726,6 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
SmallVector<BasicBlock *, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
- auto *LoopHeader = L->getHeader();
- assert(LoopHeader && "Invalid loop");
bool MadeAnyChanges = false;
for (auto *ExitBB : ExitBlocks) {
@@ -719,11 +736,13 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
IncomingValIdx != E; ++IncomingValIdx) {
auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
- // We currently only support loop exits from loop header. If the
- // incoming block is not loop header, we need to recursively check
- // all conditions starting from loop header are loop invariants.
- // Additional support might be added in the future.
- if (IncomingBB != LoopHeader)
+ // Can we prove that the exit must run on the first iteration if it
+ // runs at all? (i.e. early exits are fine for our purposes, but
+ // traces which lead to this exit being taken on the 2nd iteration
+ // aren't.) Note that this is about whether the exit branch is
+ // executed, not about whether it is taken.
+ if (!L->getLoopLatch() ||
+ !DT->dominates(IncomingBB, L->getLoopLatch()))
continue;
// Get condition that leads to the exit path.
@@ -744,8 +763,8 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
- // Only deal with PHIs.
- if (!ExitVal)
+ // Only deal with PHIs in the loop header.
+ if (!ExitVal || ExitVal->getParent() != L->getHeader())
continue;
// If ExitVal is a PHI on the loop header, then we know its
@@ -755,7 +774,7 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
assert(LoopPreheader && "Invalid loop");
int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
if (PreheaderIdx != -1) {
- assert(ExitVal->getParent() == LoopHeader &&
+ assert(ExitVal->getParent() == L->getHeader() &&
"ExitVal must be in loop header");
MadeAnyChanges = true;
PN.setIncomingValue(IncomingValIdx,
@@ -1022,24 +1041,13 @@ protected:
} // end anonymous namespace
-/// Perform a quick domtree based check for loop invariance assuming that V is
-/// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
-/// purpose.
-static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
- Instruction *Inst = dyn_cast<Instruction>(V);
- if (!Inst)
- return true;
-
- return DT->properlyDominates(Inst->getParent(), L->getHeader());
-}
-
Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
bool IsSigned, Instruction *Use) {
// Set the debug location and conservative insertion point.
IRBuilder<> Builder(Use);
// Hoist the insertion point into loop preheaders as far as possible.
for (const Loop *L = LI->getLoopFor(Use->getParent());
- L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
+ L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
L = L->getParentLoop())
Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
@@ -1305,13 +1313,15 @@ WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
return {AddRec, ExtKind};
}
-/// This IV user cannot be widen. Replace this use of the original narrow IV
+/// This IV user cannot be widened. Replace this use of the original narrow IV
/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
+ auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
+ if (!InsertPt)
+ return;
LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
<< *DU.NarrowUse << "\n");
- IRBuilder<> Builder(
- getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
+ IRBuilder<> Builder(InsertPt);
Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
}
@@ -1348,8 +1358,10 @@ bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
// Widen the compare instruction.
- IRBuilder<> Builder(
- getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
+ auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
+ if (!InsertPt)
+ return false;
+ IRBuilder<> Builder(InsertPt);
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
// Widen the other operand of the compare, if necessary.
@@ -1977,41 +1989,10 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L,
// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
//===----------------------------------------------------------------------===//
-/// Return true if this loop's backedge taken count expression can be safely and
-/// cheaply expanded into an instruction sequence that can be used by
-/// linearFunctionTestReplace.
-///
-/// TODO: This fails for pointer-type loop counters with greater than one byte
-/// strides, consequently preventing LFTR from running. For the purpose of LFTR
-/// we could skip this check in the case that the LFTR loop counter (chosen by
-/// FindLoopCounter) is also pointer type. Instead, we could directly convert
-/// the loop test to an inequality test by checking the target data's alignment
-/// of element types (given that the initial pointer value originates from or is
-/// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
-/// However, we don't yet have a strong motivation for converting loop tests
-/// into inequality tests.
-static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
- SCEVExpander &Rewriter) {
- const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
- BackedgeTakenCount->isZero())
- return false;
-
- if (!L->getExitingBlock())
- return false;
-
- // Can't rewrite non-branch yet.
- if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
- return false;
-
- if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
- return false;
-
- return true;
-}
-
-/// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
-static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
+/// Given an Value which is hoped to be part of an add recurance in the given
+/// loop, return the associated Phi node if so. Otherwise, return null. Note
+/// that this is less general than SCEVs AddRec checking.
+static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
Instruction *IncI = dyn_cast<Instruction>(IncV);
if (!IncI)
return nullptr;
@@ -2031,7 +2012,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
if (Phi && Phi->getParent() == L->getHeader()) {
- if (isLoopInvariant(IncI->getOperand(1), L, DT))
+ if (L->isLoopInvariant(IncI->getOperand(1)))
return Phi;
return nullptr;
}
@@ -2041,32 +2022,40 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
// Allow add/sub to be commuted.
Phi = dyn_cast<PHINode>(IncI->getOperand(1));
if (Phi && Phi->getParent() == L->getHeader()) {
- if (isLoopInvariant(IncI->getOperand(0), L, DT))
+ if (L->isLoopInvariant(IncI->getOperand(0)))
return Phi;
}
return nullptr;
}
-/// Return the compare guarding the loop latch, or NULL for unrecognized tests.
-static ICmpInst *getLoopTest(Loop *L) {
- assert(L->getExitingBlock() && "expected loop exit");
-
- BasicBlock *LatchBlock = L->getLoopLatch();
- // Don't bother with LFTR if the loop is not properly simplified.
- if (!LatchBlock)
- return nullptr;
-
- BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
- assert(BI && "expected exit branch");
+/// Whether the current loop exit test is based on this value. Currently this
+/// is limited to a direct use in the loop condition.
+static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
+ // TODO: Allow non-icmp loop test.
+ if (!ICmp)
+ return false;
- return dyn_cast<ICmpInst>(BI->getCondition());
+ // TODO: Allow indirect use.
+ return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
}
/// linearFunctionTestReplace policy. Return true unless we can show that the
/// current exit test is already sufficiently canonical.
-static bool needsLFTR(Loop *L, DominatorTree *DT) {
+static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
+ assert(L->getLoopLatch() && "Must be in simplified form");
+
+ // Avoid converting a constant or loop invariant test back to a runtime
+ // test. This is critical for when SCEV's cached ExitCount is less precise
+ // than the current IR (such as after we've proven a particular exit is
+ // actually dead and thus the BE count never reaches our ExitCount.)
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ if (L->isLoopInvariant(BI->getCondition()))
+ return false;
+
// Do LFTR to simplify the exit condition to an ICMP.
- ICmpInst *Cond = getLoopTest(L);
+ ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
if (!Cond)
return true;
@@ -2078,15 +2067,15 @@ static bool needsLFTR(Loop *L, DominatorTree *DT) {
// Look for a loop invariant RHS
Value *LHS = Cond->getOperand(0);
Value *RHS = Cond->getOperand(1);
- if (!isLoopInvariant(RHS, L, DT)) {
- if (!isLoopInvariant(LHS, L, DT))
+ if (!L->isLoopInvariant(RHS)) {
+ if (!L->isLoopInvariant(LHS))
return true;
std::swap(LHS, RHS);
}
// Look for a simple IV counter LHS
PHINode *Phi = dyn_cast<PHINode>(LHS);
if (!Phi)
- Phi = getLoopPhiForCounter(LHS, L, DT);
+ Phi = getLoopPhiForCounter(LHS, L);
if (!Phi)
return true;
@@ -2098,7 +2087,49 @@ static bool needsLFTR(Loop *L, DominatorTree *DT) {
// Do LFTR if the exit condition's IV is *not* a simple counter.
Value *IncV = Phi->getIncomingValue(Idx);
- return Phi != getLoopPhiForCounter(IncV, L, DT);
+ return Phi != getLoopPhiForCounter(IncV, L);
+}
+
+/// Return true if undefined behavior would provable be executed on the path to
+/// OnPathTo if Root produced a posion result. Note that this doesn't say
+/// anything about whether OnPathTo is actually executed or whether Root is
+/// actually poison. This can be used to assess whether a new use of Root can
+/// be added at a location which is control equivalent with OnPathTo (such as
+/// immediately before it) without introducing UB which didn't previously
+/// exist. Note that a false result conveys no information.
+static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
+ Instruction *OnPathTo,
+ DominatorTree *DT) {
+ // Basic approach is to assume Root is poison, propagate poison forward
+ // through all users we can easily track, and then check whether any of those
+ // users are provable UB and must execute before out exiting block might
+ // exit.
+
+ // The set of all recursive users we've visited (which are assumed to all be
+ // poison because of said visit)
+ SmallSet<const Value *, 16> KnownPoison;
+ SmallVector<const Instruction*, 16> Worklist;
+ Worklist.push_back(Root);
+ while (!Worklist.empty()) {
+ const Instruction *I = Worklist.pop_back_val();
+
+ // If we know this must trigger UB on a path leading our target.
+ if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
+ return true;
+
+ // If we can't analyze propagation through this instruction, just skip it
+ // and transitive users. Safe as false is a conservative result.
+ if (!propagatesFullPoison(I) && I != Root)
+ continue;
+
+ if (KnownPoison.insert(I).second)
+ for (const User *User : I->users())
+ Worklist.push_back(cast<Instruction>(User));
+ }
+
+ // Might be non-UB, or might have a path we couldn't prove must execute on
+ // way to exiting bb.
+ return false;
}
/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
@@ -2157,46 +2188,62 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
return true;
}
-/// Find an affine IV in canonical form.
+/// Return true if the given phi is a "counter" in L. A counter is an
+/// add recurance (of integer or pointer type) with an arbitrary start, and a
+/// step of 1. Note that L must have exactly one latch.
+static bool isLoopCounter(PHINode* Phi, Loop *L,
+ ScalarEvolution *SE) {
+ assert(Phi->getParent() == L->getHeader());
+ assert(L->getLoopLatch());
+
+ if (!SE->isSCEVable(Phi->getType()))
+ return false;
+
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
+ if (!AR || AR->getLoop() != L || !AR->isAffine())
+ return false;
+
+ const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+ if (!Step || !Step->isOne())
+ return false;
+
+ int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
+ Value *IncV = Phi->getIncomingValue(LatchIdx);
+ return (getLoopPhiForCounter(IncV, L) == Phi);
+}
+
+/// Search the loop header for a loop counter (anadd rec w/step of one)
+/// suitable for use by LFTR. If multiple counters are available, select the
+/// "best" one based profitable heuristics.
///
/// BECount may be an i8* pointer type. The pointer difference is already
/// valid count without scaling the address stride, so it remains a pointer
/// expression as far as SCEV is concerned.
-///
-/// Currently only valid for LFTR. See the comments on hasConcreteDef below.
-///
-/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
-///
-/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
-/// This is difficult in general for SCEV because of potential overflow. But we
-/// could at least handle constant BECounts.
-static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
+static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
+ const SCEV *BECount,
ScalarEvolution *SE, DominatorTree *DT) {
uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
- Value *Cond =
- cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
+ Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
// Loop over all of the PHI nodes, looking for a simple counter.
PHINode *BestPhi = nullptr;
const SCEV *BestInit = nullptr;
BasicBlock *LatchBlock = L->getLoopLatch();
- assert(LatchBlock && "needsLFTR should guarantee a loop latch");
+ assert(LatchBlock && "Must be in simplified form");
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
PHINode *Phi = cast<PHINode>(I);
- if (!SE->isSCEVable(Phi->getType()))
+ if (!isLoopCounter(Phi, L, SE))
continue;
// Avoid comparing an integer IV against a pointer Limit.
if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
continue;
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
- if (!AR || AR->getLoop() != L || !AR->isAffine())
- continue;
-
+ const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
+
// AR may be a pointer type, while BECount is an integer type.
// AR may be wider than BECount. With eq/ne tests overflow is immaterial.
// AR may not be a narrower type, or we may never exit.
@@ -2204,28 +2251,30 @@ static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
continue;
- const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
- if (!Step || !Step->isOne())
- continue;
-
- int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
- Value *IncV = Phi->getIncomingValue(LatchIdx);
- if (getLoopPhiForCounter(IncV, L, DT) != Phi)
- continue;
-
// Avoid reusing a potentially undef value to compute other values that may
// have originally had a concrete definition.
if (!hasConcreteDef(Phi)) {
// We explicitly allow unknown phis as long as they are already used by
- // the loop test. In this case we assume that performing LFTR could not
- // increase the number of undef users.
- if (ICmpInst *Cond = getLoopTest(L)) {
- if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) &&
- Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
- continue;
- }
- }
+ // the loop exit test. This is legal since performing LFTR could not
+ // increase the number of undef users.
+ Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
+ if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
+ !isLoopExitTestBasedOn(IncPhi, ExitingBB))
+ continue;
}
+
+ // Avoid introducing undefined behavior due to poison which didn't exist in
+ // the original program. (Annoyingly, the rules for poison and undef
+ // propagation are distinct, so this does NOT cover the undef case above.)
+ // We have to ensure that we don't introduce UB by introducing a use on an
+ // iteration where said IV produces poison. Our strategy here differs for
+ // pointers and integer IVs. For integers, we strip and reinfer as needed,
+ // see code in linearFunctionTestReplace. For pointers, we restrict
+ // transforms as there is no good way to reinfer inbounds once lost.
+ if (!Phi->getType()->isIntegerTy() &&
+ !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
+ continue;
+
const SCEV *Init = AR->getStart();
if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
@@ -2251,47 +2300,49 @@ static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
return BestPhi;
}
-/// Help linearFunctionTestReplace by generating a value that holds the RHS of
-/// the new loop test.
-static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
+/// Insert an IR expression which computes the value held by the IV IndVar
+/// (which must be an loop counter w/unit stride) after the backedge of loop L
+/// is taken ExitCount times.
+static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
+ const SCEV *ExitCount, bool UsePostInc, Loop *L,
SCEVExpander &Rewriter, ScalarEvolution *SE) {
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
- assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
+ assert(isLoopCounter(IndVar, L, SE));
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
const SCEV *IVInit = AR->getStart();
- // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
- // finds a valid pointer IV. Sign extend BECount in order to materialize a
+ // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
+ // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
// GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
// the existing GEPs whenever possible.
- if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
+ if (IndVar->getType()->isPointerTy() &&
+ !ExitCount->getType()->isPointerTy()) {
// IVOffset will be the new GEP offset that is interpreted by GEP as a
- // signed value. IVCount on the other hand represents the loop trip count,
+ // signed value. ExitCount on the other hand represents the loop trip count,
// which is an unsigned value. FindLoopCounter only allows induction
// variables that have a positive unit stride of one. This means we don't
// have to handle the case of negative offsets (yet) and just need to zero
- // extend IVCount.
+ // extend ExitCount.
Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
- const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
+ const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
+ if (UsePostInc)
+ IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
// Expand the code for the iteration count.
assert(SE->isLoopInvariant(IVOffset, L) &&
"Computed iteration count is not loop invariant!");
- BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
- Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
- Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
- assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
// We could handle pointer IVs other than i8*, but we need to compensate for
- // gep index scaling. See canExpandBackedgeTakenCount comments.
+ // gep index scaling.
assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
- cast<PointerType>(GEPBase->getType())
+ cast<PointerType>(IndVar->getType())
->getElementType())->isOne() &&
"unit stride pointer IV must be i8*");
- IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
- return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
+ const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
} else {
- // In any other case, convert both IVInit and IVCount to integers before
+ // In any other case, convert both IVInit and ExitCount to integers before
// comparing. This may result in SCEV expansion of pointers, but in practice
// SCEV will fold the pointer arithmetic away as such:
// BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
@@ -2299,35 +2350,40 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
// Valid Cases: (1) both integers is most common; (2) both may be pointers
// for simple memset-style loops.
//
- // IVInit integer and IVCount pointer would only occur if a canonical IV
+ // IVInit integer and ExitCount pointer would only occur if a canonical IV
// were generated on top of case #2, which is not expected.
- const SCEV *IVLimit = nullptr;
- // For unit stride, IVCount = Start + BECount with 2's complement overflow.
- // For non-zero Start, compute IVCount here.
- if (AR->getStart()->isZero())
- IVLimit = IVCount;
- else {
- assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
- const SCEV *IVInit = AR->getStart();
+ assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
+ // For unit stride, IVCount = Start + ExitCount with 2's complement
+ // overflow.
+
+ // For integer IVs, truncate the IV before computing IVInit + BECount,
+ // unless we know apriori that the limit must be a constant when evaluated
+ // in the bitwidth of the IV. We prefer (potentially) keeping a truncate
+ // of the IV in the loop over a (potentially) expensive expansion of the
+ // widened exit count add(zext(add)) expression.
+ if (SE->getTypeSizeInBits(IVInit->getType())
+ > SE->getTypeSizeInBits(ExitCount->getType())) {
+ if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
+ ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
+ else
+ IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
+ }
- // For integer IVs, truncate the IV before computing IVInit + BECount.
- if (SE->getTypeSizeInBits(IVInit->getType())
- > SE->getTypeSizeInBits(IVCount->getType()))
- IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
+ const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
+
+ if (UsePostInc)
+ IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
- IVLimit = SE->getAddExpr(IVInit, IVCount);
- }
// Expand the code for the iteration count.
- BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
- IRBuilder<> Builder(BI);
assert(SE->isLoopInvariant(IVLimit, L) &&
"Computed iteration count is not loop invariant!");
// Ensure that we generate the same type as IndVar, or a smaller integer
// type. In the presence of null pointer values, we have an integer type
// SCEV expression (IVInit) for a pointer type IV value (IndVar).
- Type *LimitTy = IVCount->getType()->isPointerTy() ?
- IndVar->getType() : IVCount->getType();
+ Type *LimitTy = ExitCount->getType()->isPointerTy() ?
+ IndVar->getType() : ExitCount->getType();
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
}
}
@@ -2338,51 +2394,70 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
/// determine a loop-invariant trip count of the loop, which is actually a much
/// broader range than just linear tests.
bool IndVarSimplify::
-linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
+ const SCEV *ExitCount,
PHINode *IndVar, SCEVExpander &Rewriter) {
- assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
+ assert(L->getLoopLatch() && "Loop no longer in simplified form?");
+ assert(isLoopCounter(IndVar, L, SE));
+ Instruction * const IncVar =
+ cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
- // Initialize CmpIndVar and IVCount to their preincremented values.
+ // Initialize CmpIndVar to the preincremented IV.
Value *CmpIndVar = IndVar;
- const SCEV *IVCount = BackedgeTakenCount;
-
- assert(L->getLoopLatch() && "Loop no longer in simplified form?");
+ bool UsePostInc = false;
// If the exiting block is the same as the backedge block, we prefer to
// compare against the post-incremented value, otherwise we must compare
// against the preincremented value.
- if (L->getExitingBlock() == L->getLoopLatch()) {
- // Add one to the "backedge-taken" count to get the trip count.
- // This addition may overflow, which is valid as long as the comparison is
- // truncated to BackedgeTakenCount->getType().
- IVCount = SE->getAddExpr(BackedgeTakenCount,
- SE->getOne(BackedgeTakenCount->getType()));
- // The BackedgeTaken expression contains the number of times that the
- // backedge branches to the loop header. This is one less than the
- // number of times the loop executes, so use the incremented indvar.
- CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+ if (ExitingBB == L->getLoopLatch()) {
+ // For pointer IVs, we chose to not strip inbounds which requires us not
+ // to add a potentially UB introducing use. We need to either a) show
+ // the loop test we're modifying is already in post-inc form, or b) show
+ // that adding a use must not introduce UB.
+ bool SafeToPostInc =
+ IndVar->getType()->isIntegerTy() ||
+ isLoopExitTestBasedOn(IncVar, ExitingBB) ||
+ mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
+ if (SafeToPostInc) {
+ UsePostInc = true;
+ CmpIndVar = IncVar;
+ }
}
- Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
+ // It may be necessary to drop nowrap flags on the incrementing instruction
+ // if either LFTR moves from a pre-inc check to a post-inc check (in which
+ // case the increment might have previously been poison on the last iteration
+ // only) or if LFTR switches to a different IV that was previously dynamically
+ // dead (and as such may be arbitrarily poison). We remove any nowrap flags
+ // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
+ // check), because the pre-inc addrec flags may be adopted from the original
+ // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
+ // TODO: This handling is inaccurate for one case: If we switch to a
+ // dynamically dead IV that wraps on the first loop iteration only, which is
+ // not covered by the post-inc addrec. (If the new IV was not dynamically
+ // dead, it could not be poison on the first iteration in the first place.)
+ if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
+ if (BO->hasNoUnsignedWrap())
+ BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
+ if (BO->hasNoSignedWrap())
+ BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
+ }
+
+ Value *ExitCnt = genLoopLimit(
+ IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
assert(ExitCnt->getType()->isPointerTy() ==
IndVar->getType()->isPointerTy() &&
"genLoopLimit missed a cast");
// Insert a new icmp_ne or icmp_eq instruction before the branch.
- BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
ICmpInst::Predicate P;
if (L->contains(BI->getSuccessor(0)))
P = ICmpInst::ICMP_NE;
else
P = ICmpInst::ICMP_EQ;
- LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
- << " LHS:" << *CmpIndVar << '\n'
- << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
- << "\n"
- << " RHS:\t" << *ExitCnt << "\n"
- << " IVCount:\t" << *IVCount << "\n");
-
IRBuilder<> Builder(BI);
// The new loop exit condition should reuse the debug location of the
@@ -2390,67 +2465,58 @@ linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
- // LFTR can ignore IV overflow and truncate to the width of
- // BECount. This avoids materializing the add(zext(add)) expression.
+ // For integer IVs, if we evaluated the limit in the narrower bitwidth to
+ // avoid the expensive expansion of the limit expression in the wider type,
+ // emit a truncate to narrow the IV to the ExitCount type. This is safe
+ // since we know (from the exit count bitwidth), that we can't self-wrap in
+ // the narrower type.
unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
if (CmpIndVarSize > ExitCntSize) {
- const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
- const SCEV *ARStart = AR->getStart();
- const SCEV *ARStep = AR->getStepRecurrence(*SE);
- // For constant IVCount, avoid truncation.
- if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
- const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
- APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
- // Note that the post-inc value of BackedgeTakenCount may have overflowed
- // above such that IVCount is now zero.
- if (IVCount != BackedgeTakenCount && Count == 0) {
- Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
- ++Count;
- }
- else
- Count = Count.zext(CmpIndVarSize);
- APInt NewLimit;
- if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
- NewLimit = Start - Count;
- else
- NewLimit = Start + Count;
- ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
-
- LLVM_DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n");
+ assert(!CmpIndVar->getType()->isPointerTy() &&
+ !ExitCnt->getType()->isPointerTy());
+
+ // Before resorting to actually inserting the truncate, use the same
+ // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
+ // the other side of the comparison instead. We still evaluate the limit
+ // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
+ // a truncate within in.
+ bool Extended = false;
+ const SCEV *IV = SE->getSCEV(CmpIndVar);
+ const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
+ ExitCnt->getType());
+ const SCEV *ZExtTrunc =
+ SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
+
+ if (ZExtTrunc == IV) {
+ Extended = true;
+ ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
+ "wide.trip.count");
} else {
- // We try to extend trip count first. If that doesn't work we truncate IV.
- // Zext(trunc(IV)) == IV implies equivalence of the following two:
- // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
- // one of the two holds, extend the trip count, otherwise we truncate IV.
- bool Extended = false;
- const SCEV *IV = SE->getSCEV(CmpIndVar);
- const SCEV *ZExtTrunc =
- SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
- ExitCnt->getType()),
- CmpIndVar->getType());
-
- if (ZExtTrunc == IV) {
+ const SCEV *SExtTrunc =
+ SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
+ if (SExtTrunc == IV) {
Extended = true;
- ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
+ ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
"wide.trip.count");
- } else {
- const SCEV *SExtTrunc =
- SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
- ExitCnt->getType()),
- CmpIndVar->getType());
- if (SExtTrunc == IV) {
- Extended = true;
- ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
- "wide.trip.count");
- }
}
-
- if (!Extended)
- CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
- "lftr.wideiv");
}
+
+ if (Extended) {
+ bool Discard;
+ L->makeLoopInvariant(ExitCnt, Discard);
+ } else
+ CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
+ "lftr.wideiv");
}
+ LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
+ << " LHS:" << *CmpIndVar << '\n'
+ << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
+ << "\n"
+ << " RHS:\t" << *ExitCnt << "\n"
+ << "ExitCount:\t" << *ExitCount << "\n"
+ << " was: " << *BI->getCondition() << "\n");
+
Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
Value *OrigCond = BI->getCondition();
// It's tempting to use replaceAllUsesWith here to fully replace the old
@@ -2558,6 +2624,111 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
return MadeAnyChanges;
}
+bool IndVarSimplify::optimizeLoopExits(Loop *L) {
+ SmallVector<BasicBlock*, 16> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ // Form an expression for the maximum exit count possible for this loop. We
+ // merge the max and exact information to approximate a version of
+ // getMaxBackedgeTakenInfo which isn't restricted to just constants.
+ // TODO: factor this out as a version of getMaxBackedgeTakenCount which
+ // isn't guaranteed to return a constant.
+ SmallVector<const SCEV*, 4> ExitCounts;
+ const SCEV *MaxConstEC = SE->getMaxBackedgeTakenCount(L);
+ if (!isa<SCEVCouldNotCompute>(MaxConstEC))
+ ExitCounts.push_back(MaxConstEC);
+ for (BasicBlock *ExitingBB : ExitingBlocks) {
+ const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
+ if (!isa<SCEVCouldNotCompute>(ExitCount)) {
+ assert(DT->dominates(ExitingBB, L->getLoopLatch()) &&
+ "We should only have known counts for exiting blocks that "
+ "dominate latch!");
+ ExitCounts.push_back(ExitCount);
+ }
+ }
+ if (ExitCounts.empty())
+ return false;
+ const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts);
+
+ bool Changed = false;
+ for (BasicBlock *ExitingBB : ExitingBlocks) {
+ // If our exitting block exits multiple loops, we can only rewrite the
+ // innermost one. Otherwise, we're changing how many times the innermost
+ // loop runs before it exits.
+ if (LI->getLoopFor(ExitingBB) != L)
+ continue;
+
+ // Can't rewrite non-branch yet.
+ BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ continue;
+
+ // If already constant, nothing to do.
+ if (isa<Constant>(BI->getCondition()))
+ continue;
+
+ const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
+ if (isa<SCEVCouldNotCompute>(ExitCount))
+ continue;
+
+ // If we know we'd exit on the first iteration, rewrite the exit to
+ // reflect this. This does not imply the loop must exit through this
+ // exit; there may be an earlier one taken on the first iteration.
+ // TODO: Given we know the backedge can't be taken, we should go ahead
+ // and break it. Or at least, kill all the header phis and simplify.
+ if (ExitCount->isZero()) {
+ bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
+ auto *OldCond = BI->getCondition();
+ auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) :
+ ConstantInt::getFalse(OldCond->getType());
+ BI->setCondition(NewCond);
+ if (OldCond->use_empty())
+ DeadInsts.push_back(OldCond);
+ Changed = true;
+ continue;
+ }
+
+ // If we end up with a pointer exit count, bail.
+ if (!ExitCount->getType()->isIntegerTy() ||
+ !MaxExitCount->getType()->isIntegerTy())
+ return false;
+
+ Type *WiderType =
+ SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
+ ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
+ MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
+ assert(MaxExitCount->getType() == ExitCount->getType());
+
+ // Can we prove that some other exit must be taken strictly before this
+ // one? TODO: handle cases where ule is known, and equality is covered
+ // by a dominating exit
+ if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
+ MaxExitCount, ExitCount)) {
+ bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
+ auto *OldCond = BI->getCondition();
+ auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) :
+ ConstantInt::getTrue(OldCond->getType());
+ BI->setCondition(NewCond);
+ if (OldCond->use_empty())
+ DeadInsts.push_back(OldCond);
+ Changed = true;
+ continue;
+ }
+
+ // TODO: If we can prove that the exiting iteration is equal to the exit
+ // count for this exit and that no previous exit oppurtunities exist within
+ // the loop, then we can discharge all other exits. (May fall out of
+ // previous TODO.)
+
+ // TODO: If we can't prove any relation between our exit count and the
+ // loops exit count, but taking this exit doesn't require actually running
+ // the loop (i.e. no side effects, no computed values used in exit), then
+ // we can replace the exit test with a loop invariant test which exits on
+ // the first iteration.
+ }
+ return Changed;
+}
+
//===----------------------------------------------------------------------===//
// IndVarSimplify driver. Manage several subpasses of IV simplification.
//===----------------------------------------------------------------------===//
@@ -2614,23 +2785,60 @@ bool IndVarSimplify::run(Loop *L) {
// Eliminate redundant IV cycles.
NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
+ Changed |= optimizeLoopExits(L);
+
// If we have a trip count expression, rewrite the loop's exit condition
- // using it. We can currently only handle loops with a single exit.
- if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) &&
- needsLFTR(L, DT)) {
- PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
- if (IndVar) {
+ // using it.
+ if (!DisableLFTR) {
+ SmallVector<BasicBlock*, 16> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (BasicBlock *ExitingBB : ExitingBlocks) {
+ // Can't rewrite non-branch yet.
+ if (!isa<BranchInst>(ExitingBB->getTerminator()))
+ continue;
+
+ // If our exitting block exits multiple loops, we can only rewrite the
+ // innermost one. Otherwise, we're changing how many times the innermost
+ // loop runs before it exits.
+ if (LI->getLoopFor(ExitingBB) != L)
+ continue;
+
+ if (!needsLFTR(L, ExitingBB))
+ continue;
+
+ const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
+ if (isa<SCEVCouldNotCompute>(ExitCount))
+ continue;
+
+ // This was handled above, but as we form SCEVs, we can sometimes refine
+ // existing ones; this allows exit counts to be folded to zero which
+ // weren't when optimizeLoopExits saw them. Arguably, we should iterate
+ // until stable to handle cases like this better.
+ if (ExitCount->isZero())
+ continue;
+
+ PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
+ if (!IndVar)
+ continue;
+
+ // Avoid high cost expansions. Note: This heuristic is questionable in
+ // that our definition of "high cost" is not exactly principled.
+ if (Rewriter.isHighCostExpansion(ExitCount, L))
+ continue;
+
// Check preconditions for proper SCEVExpander operation. SCEV does not
- // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
- // pass that uses the SCEVExpander must do it. This does not work well for
- // loop passes because SCEVExpander makes assumptions about all loops,
- // while LoopPassManager only forces the current loop to be simplified.
+ // express SCEVExpander's dependencies, such as LoopSimplify. Instead
+ // any pass that uses the SCEVExpander must do it. This does not work
+ // well for loop passes because SCEVExpander makes assumptions about
+ // all loops, while LoopPassManager only forces the current loop to be
+ // simplified.
//
// FIXME: SCEV expansion has no way to bail out, so the caller must
// explicitly check any assumptions made by SCEV. Brittle.
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
if (!AR || AR->getLoop()->getLoopPreheader())
- Changed |= linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+ Changed |= linearFunctionTestReplace(L, ExitingBB,
+ ExitCount, IndVar,
Rewriter);
}
}
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 1c701bbee185..997d68838152 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -1,9 +1,8 @@
//===- InductiveRangeCheckElimination.cpp - -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -116,6 +115,11 @@ static cl::opt<bool> SkipProfitabilityChecks("irce-skip-profitability-checks",
static cl::opt<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch",
cl::Hidden, cl::init(true));
+static cl::opt<bool> AllowNarrowLatchCondition(
+ "irce-allow-narrow-latch", cl::Hidden, cl::init(true),
+ cl::desc("If set to true, IRCE may eliminate wide range checks in loops "
+ "with narrow latch condition."));
+
static const char *ClonedLoopTag = "irce.loop.clone";
#define DEBUG_TYPE "irce"
@@ -532,12 +536,6 @@ class LoopConstrainer {
Optional<const SCEV *> HighLimit;
};
- // A utility function that does a `replaceUsesOfWith' on the incoming block
- // set of a `PHINode' -- replaces instances of `Block' in the `PHINode's
- // incoming block list with `ReplaceBy'.
- static void replacePHIBlock(PHINode *PN, BasicBlock *Block,
- BasicBlock *ReplaceBy);
-
// Compute a safe set of limits for the main loop to run in -- effectively the
// intersection of `Range' and the iteration space of the original loop.
// Return None if unable to compute the set of subranges.
@@ -639,13 +637,6 @@ public:
} // end anonymous namespace
-void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block,
- BasicBlock *ReplaceBy) {
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingBlock(i) == Block)
- PN->setIncomingBlock(i, ReplaceBy);
-}
-
/// Given a loop with an deccreasing induction variable, is it possible to
/// safely calculate the bounds of a new loop using the given Predicate.
static bool isSafeDecreasingBound(const SCEV *Start,
@@ -868,7 +859,7 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
assert(!StepCI->isZero() && "Zero step?");
bool IsIncreasing = !StepCI->isNegative();
- bool IsSignedPredicate = ICmpInst::isSigned(Pred);
+ bool IsSignedPredicate;
const SCEV *StartNext = IndVarBase->getStart();
const SCEV *Addend = SE.getNegativeSCEV(IndVarBase->getStepRecurrence(SE));
const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
@@ -1045,11 +1036,23 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
return Result;
}
+/// If the type of \p S matches with \p Ty, return \p S. Otherwise, return
+/// signed or unsigned extension of \p S to type \p Ty.
+static const SCEV *NoopOrExtend(const SCEV *S, Type *Ty, ScalarEvolution &SE,
+ bool Signed) {
+ return Signed ? SE.getNoopOrSignExtend(S, Ty) : SE.getNoopOrZeroExtend(S, Ty);
+}
+
Optional<LoopConstrainer::SubRanges>
LoopConstrainer::calculateSubRanges(bool IsSignedPredicate) const {
IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType());
- if (Range.getType() != Ty)
+ auto *RTy = cast<IntegerType>(Range.getType());
+
+ // We only support wide range checks and narrow latches.
+ if (!AllowNarrowLatchCondition && RTy != Ty)
+ return None;
+ if (RTy->getBitWidth() < Ty->getBitWidth())
return None;
LoopConstrainer::SubRanges Result;
@@ -1057,8 +1060,10 @@ LoopConstrainer::calculateSubRanges(bool IsSignedPredicate) const {
// I think we can be more aggressive here and make this nuw / nsw if the
// addition that feeds into the icmp for the latch's terminating branch is nuw
// / nsw. In any case, a wrapping 2's complement addition is safe.
- const SCEV *Start = SE.getSCEV(MainLoopStructure.IndVarStart);
- const SCEV *End = SE.getSCEV(MainLoopStructure.LoopExitAt);
+ const SCEV *Start = NoopOrExtend(SE.getSCEV(MainLoopStructure.IndVarStart),
+ RTy, SE, IsSignedPredicate);
+ const SCEV *End = NoopOrExtend(SE.getSCEV(MainLoopStructure.LoopExitAt), RTy,
+ SE, IsSignedPredicate);
bool Increasing = MainLoopStructure.IndVarIncreasing;
@@ -1068,7 +1073,7 @@ LoopConstrainer::calculateSubRanges(bool IsSignedPredicate) const {
const SCEV *Smallest = nullptr, *Greatest = nullptr, *GreatestSeen = nullptr;
- const SCEV *One = SE.getOne(Ty);
+ const SCEV *One = SE.getOne(RTy);
if (Increasing) {
Smallest = Start;
Greatest = End;
@@ -1257,6 +1262,13 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
bool IsSignedPredicate = LS.IsSignedPredicate;
IRBuilder<> B(PreheaderJump);
+ auto *RangeTy = Range.getBegin()->getType();
+ auto NoopOrExt = [&](Value *V) {
+ if (V->getType() == RangeTy)
+ return V;
+ return IsSignedPredicate ? B.CreateSExt(V, RangeTy, "wide." + V->getName())
+ : B.CreateZExt(V, RangeTy, "wide." + V->getName());
+ };
// EnterLoopCond - is it okay to start executing this `LS'?
Value *EnterLoopCond = nullptr;
@@ -1264,15 +1276,16 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
Increasing
? (IsSignedPredicate ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT)
: (IsSignedPredicate ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
- EnterLoopCond = B.CreateICmp(Pred, LS.IndVarStart, ExitSubloopAt);
+ Value *IndVarStart = NoopOrExt(LS.IndVarStart);
+ EnterLoopCond = B.CreateICmp(Pred, IndVarStart, ExitSubloopAt);
B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit);
PreheaderJump->eraseFromParent();
LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector);
B.SetInsertPoint(LS.LatchBr);
- Value *TakeBackedgeLoopCond = B.CreateICmp(Pred, LS.IndVarBase,
- ExitSubloopAt);
+ Value *IndVarBase = NoopOrExt(LS.IndVarBase);
+ Value *TakeBackedgeLoopCond = B.CreateICmp(Pred, IndVarBase, ExitSubloopAt);
Value *CondForBranch = LS.LatchBrExitIdx == 1
? TakeBackedgeLoopCond
@@ -1285,7 +1298,8 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
// IterationsLeft - are there any more iterations left, given the original
// upper bound on the induction variable? If not, we branch to the "real"
// exit.
- Value *IterationsLeft = B.CreateICmp(Pred, LS.IndVarBase, LS.LoopExitAt);
+ Value *LoopExitAt = NoopOrExt(LS.LoopExitAt);
+ Value *IterationsLeft = B.CreateICmp(Pred, IndVarBase, LoopExitAt);
B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit);
BranchInst *BranchToContinuation =
@@ -1304,15 +1318,14 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
}
- RRI.IndVarEnd = PHINode::Create(LS.IndVarBase->getType(), 2, "indvar.end",
+ RRI.IndVarEnd = PHINode::Create(IndVarBase->getType(), 2, "indvar.end",
BranchToContinuation);
- RRI.IndVarEnd->addIncoming(LS.IndVarStart, Preheader);
- RRI.IndVarEnd->addIncoming(LS.IndVarBase, RRI.ExitSelector);
+ RRI.IndVarEnd->addIncoming(IndVarStart, Preheader);
+ RRI.IndVarEnd->addIncoming(IndVarBase, RRI.ExitSelector);
// The latch exit now has a branch from `RRI.ExitSelector' instead of
// `LS.Latch'. The PHI nodes need to be updated to reflect that.
- for (PHINode &PN : LS.LatchExit->phis())
- replacePHIBlock(&PN, LS.Latch, RRI.ExitSelector);
+ LS.LatchExit->replacePhiUsesWith(LS.Latch, RRI.ExitSelector);
return RRI;
}
@@ -1322,9 +1335,8 @@ void LoopConstrainer::rewriteIncomingValuesForPHIs(
const LoopConstrainer::RewrittenRangeInfo &RRI) const {
unsigned PHIIndex = 0;
for (PHINode &PN : LS.Header->phis())
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i < e; ++i)
- if (PN.getIncomingBlock(i) == ContinuationBlock)
- PN.setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]);
+ PN.setIncomingValueForBlock(ContinuationBlock,
+ RRI.PHIValuesAtPseudoExit[PHIIndex++]);
LS.IndVarStart = RRI.IndVarEnd;
}
@@ -1335,9 +1347,7 @@ BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS,
BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header);
BranchInst::Create(LS.Header, Preheader);
- for (PHINode &PN : LS.Header->phis())
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i < e; ++i)
- replacePHIBlock(&PN, OldPreheader, Preheader);
+ LS.Header->replacePhiUsesWith(OldPreheader, Preheader);
return Preheader;
}
@@ -1393,7 +1403,7 @@ bool LoopConstrainer::run() {
SubRanges SR = MaybeSR.getValue();
bool Increasing = MainLoopStructure.IndVarIncreasing;
IntegerType *IVTy =
- cast<IntegerType>(MainLoopStructure.IndVarBase->getType());
+ cast<IntegerType>(Range.getBegin()->getType());
SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "irce");
Instruction *InsertPt = OriginalPreheader->getTerminator();
@@ -1534,7 +1544,7 @@ bool LoopConstrainer::run() {
// This function canonicalizes the loop into Loop-Simplify and LCSSA forms.
auto CanonicalizeLoop = [&] (Loop *L, bool IsOriginalLoop) {
formLCSSARecursively(*L, DT, &LI, &SE);
- simplifyLoop(L, &DT, &LI, &SE, nullptr, true);
+ simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr, true);
// Pre/post loops are slow paths, we do not need to perform any loop
// optimizations on them.
if (!IsOriginalLoop)
@@ -1556,6 +1566,12 @@ Optional<InductiveRangeCheck::Range>
InductiveRangeCheck::computeSafeIterationSpace(
ScalarEvolution &SE, const SCEVAddRecExpr *IndVar,
bool IsLatchSigned) const {
+ // We can deal when types of latch check and range checks don't match in case
+ // if latch check is more narrow.
+ auto *IVType = cast<IntegerType>(IndVar->getType());
+ auto *RCType = cast<IntegerType>(getBegin()->getType());
+ if (IVType->getBitWidth() > RCType->getBitWidth())
+ return None;
// IndVar is of the form "A + B * I" (where "I" is the canonical induction
// variable, that may or may not exist as a real llvm::Value in the loop) and
// this inductive range check is a range check on the "C + D * I" ("C" is
@@ -1579,8 +1595,9 @@ InductiveRangeCheck::computeSafeIterationSpace(
if (!IndVar->isAffine())
return None;
- const SCEV *A = IndVar->getStart();
- const SCEVConstant *B = dyn_cast<SCEVConstant>(IndVar->getStepRecurrence(SE));
+ const SCEV *A = NoopOrExtend(IndVar->getStart(), RCType, SE, IsLatchSigned);
+ const SCEVConstant *B = dyn_cast<SCEVConstant>(
+ NoopOrExtend(IndVar->getStepRecurrence(SE), RCType, SE, IsLatchSigned));
if (!B)
return None;
assert(!B->isZero() && "Recurrence with zero step?");
@@ -1591,7 +1608,7 @@ InductiveRangeCheck::computeSafeIterationSpace(
return None;
assert(!D->getValue()->isZero() && "Recurrence with zero step?");
- unsigned BitWidth = cast<IntegerType>(IndVar->getType())->getBitWidth();
+ unsigned BitWidth = RCType->getBitWidth();
const SCEV *SIntMax = SE.getConstant(APInt::getSignedMaxValue(BitWidth));
// Subtract Y from X so that it does not go through border of the IV
diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp
index fbbc09eb487f..5f0e2001c73d 100644
--- a/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -1,9 +1,8 @@
//===- InferAddressSpace.cpp - --------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -149,7 +148,9 @@ class InferAddressSpaces : public FunctionPass {
public:
static char ID;
- InferAddressSpaces() : FunctionPass(ID) {}
+ InferAddressSpaces() :
+ FunctionPass(ID), FlatAddrSpace(UninitializedAddressSpace) {}
+ InferAddressSpaces(unsigned AS) : FunctionPass(ID), FlatAddrSpace(AS) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -218,13 +219,17 @@ static bool isAddressExpression(const Value &V) {
if (!isa<Operator>(V))
return false;
- switch (cast<Operator>(V).getOpcode()) {
+ const Operator &Op = cast<Operator>(V);
+ switch (Op.getOpcode()) {
case Instruction::PHI:
+ assert(Op.getType()->isPointerTy());
+ return true;
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
case Instruction::GetElementPtr:
- case Instruction::Select:
return true;
+ case Instruction::Select:
+ return Op.getType()->isPointerTy();
default:
return false;
}
@@ -548,10 +553,17 @@ static Value *cloneConstantExprWithNewAddressSpace(
if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) {
IsNew = true;
NewOperands.push_back(cast<Constant>(NewOperand));
- } else {
- // Otherwise, reuses the old operand.
- NewOperands.push_back(Operand);
+ continue;
}
+ if (auto CExpr = dyn_cast<ConstantExpr>(Operand))
+ if (Value *NewOperand = cloneConstantExprWithNewAddressSpace(
+ CExpr, NewAddrSpace, ValueWithNewAddrSpace)) {
+ IsNew = true;
+ NewOperands.push_back(cast<Constant>(NewOperand));
+ continue;
+ }
+ // Otherwise, reuses the old operand.
+ NewOperands.push_back(Operand);
}
// If !IsNew, we will replace the Value with itself. However, replaced values
@@ -621,9 +633,12 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
const TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- FlatAddrSpace = TTI.getFlatAddressSpace();
- if (FlatAddrSpace == UninitializedAddressSpace)
- return false;
+
+ if (FlatAddrSpace == UninitializedAddressSpace) {
+ FlatAddrSpace = TTI.getFlatAddressSpace();
+ if (FlatAddrSpace == UninitializedAddressSpace)
+ return false;
+ }
// Collects all flat address expressions in postorder.
std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F);
@@ -991,8 +1006,12 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
}
// Otherwise, replaces the use with flat(NewV).
- if (Instruction *I = dyn_cast<Instruction>(V)) {
- BasicBlock::iterator InsertPos = std::next(I->getIterator());
+ if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+ // Don't create a copy of the original addrspacecast.
+ if (U == V && isa<AddrSpaceCastInst>(V))
+ continue;
+
+ BasicBlock::iterator InsertPos = std::next(Inst->getIterator());
while (isa<PHINode>(InsertPos))
++InsertPos;
U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
@@ -1015,6 +1034,6 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
return true;
}
-FunctionPass *llvm::createInferAddressSpacesPass() {
- return new InferAddressSpaces();
+FunctionPass *llvm::createInferAddressSpacesPass(unsigned AddressSpace) {
+ return new InferAddressSpaces(AddressSpace);
}
diff --git a/lib/Transforms/Scalar/InstSimplifyPass.cpp b/lib/Transforms/Scalar/InstSimplifyPass.cpp
index 05cd48d83267..6616364ab203 100644
--- a/lib/Transforms/Scalar/InstSimplifyPass.cpp
+++ b/lib/Transforms/Scalar/InstSimplifyPass.cpp
@@ -1,9 +1,8 @@
//===- InstSimplifyPass.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 48de56a02834..b86bf2fefbe5 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1,9 +1,8 @@
//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,6 +23,7 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -38,7 +38,6 @@
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -103,6 +102,12 @@ static cl::opt<bool> PrintLVIAfterJumpThreading(
cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
cl::Hidden);
+static cl::opt<bool> ThreadAcrossLoopHeaders(
+ "jump-threading-across-loop-headers",
+ cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
+ cl::init(false), cl::Hidden);
+
+
namespace {
/// This pass performs 'jump threading', which looks at blocks that have
@@ -369,7 +374,8 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
if (!DT.isReachableFromEntry(&BB))
Unreachable.insert(&BB);
- FindLoopHeaders(F);
+ if (!ThreadAcrossLoopHeaders)
+ FindLoopHeaders(F);
bool EverChanged = false;
bool Changed;
@@ -1056,7 +1062,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
Condition = IB->getAddress()->stripPointerCasts();
Preference = WantBlockAddress;
} else {
- return false; // Must be an invoke.
+ return false; // Must be an invoke or callbr.
}
// Run constant folding to see if we can reduce the condition to a simple
@@ -1092,7 +1098,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
<< "' folding undef terminator: " << *BBTerm << '\n');
BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
BBTerm->eraseFromParent();
- DTU->applyUpdates(Updates);
+ DTU->applyUpdatesPermissive(Updates);
return true;
}
@@ -1143,7 +1149,9 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1;
BasicBlock *ToRemoveSucc = CondBr->getSuccessor(ToRemove);
ToRemoveSucc->removePredecessor(BB, true);
- BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
+ BranchInst *UncondBr =
+ BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
+ UncondBr->setDebugLoc(CondBr->getDebugLoc());
CondBr->eraseFromParent();
if (CondCmp->use_empty())
CondCmp->eraseFromParent();
@@ -1160,7 +1168,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
ConstantInt::getFalse(CondCmp->getType());
ReplaceFoldableUses(CondCmp, CI);
}
- DTU->deleteEdgeRelaxed(BB, ToRemoveSucc);
+ DTU->applyUpdatesPermissive(
+ {{DominatorTree::Delete, BB, ToRemoveSucc}});
return true;
}
@@ -1172,7 +1181,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
}
if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
- TryToUnfoldSelect(SI, BB);
+ if (TryToUnfoldSelect(SI, BB))
+ return true;
// Check for some cases that are worth simplifying. Right now we want to look
// for loads that are used by a switch or by the condition for the branch. If
@@ -1245,9 +1255,10 @@ bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) {
BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
RemoveSucc->removePredecessor(BB);
- BranchInst::Create(KeepSucc, BI);
+ BranchInst *UncondBI = BranchInst::Create(KeepSucc, BI);
+ UncondBI->setDebugLoc(BI->getDebugLoc());
BI->eraseFromParent();
- DTU->deleteEdgeRelaxed(BB, RemoveSucc);
+ DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
return true;
}
CurrentBB = CurrentPred;
@@ -1429,7 +1440,9 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
// Add all the unavailable predecessors to the PredsToSplit list.
for (BasicBlock *P : predecessors(LoadBB)) {
// If the predecessor is an indirect goto, we can't split the edge.
- if (isa<IndirectBrInst>(P->getTerminator()))
+ // Same for CallBr.
+ if (isa<IndirectBrInst>(P->getTerminator()) ||
+ isa<CallBrInst>(P->getTerminator()))
return false;
if (!AvailablePredSet.count(P))
@@ -1446,11 +1459,11 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
if (UnavailablePred) {
assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
"Can't handle critical edge here!");
- LoadInst *NewVal =
- new LoadInst(LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
- LoadI->getName() + ".pr", false, LoadI->getAlignment(),
- LoadI->getOrdering(), LoadI->getSyncScopeID(),
- UnavailablePred->getTerminator());
+ LoadInst *NewVal = new LoadInst(
+ LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
+ LoadI->getName() + ".pr", false, LoadI->getAlignment(),
+ LoadI->getOrdering(), LoadI->getSyncScopeID(),
+ UnavailablePred->getTerminator());
NewVal->setDebugLoc(LoadI->getDebugLoc());
if (AATags)
NewVal->setAAMetadata(AATags);
@@ -1474,8 +1487,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
for (pred_iterator PI = PB; PI != PE; ++PI) {
BasicBlock *P = *PI;
AvailablePredsTy::iterator I =
- std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
- std::make_pair(P, (Value*)nullptr));
+ llvm::lower_bound(AvailablePreds, std::make_pair(P, (Value *)nullptr));
assert(I != AvailablePreds.end() && I->first == P &&
"Didn't find entry for predecessor!");
@@ -1601,7 +1613,6 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
Constant *OnlyVal = nullptr;
Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
- unsigned PredWithKnownDest = 0;
for (const auto &PredValue : PredValues) {
BasicBlock *Pred = PredValue.second;
if (!SeenPreds.insert(Pred).second)
@@ -1638,12 +1649,10 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
OnlyVal = MultipleVal;
}
- // We know where this predecessor is going.
- ++PredWithKnownDest;
-
// If the predecessor ends with an indirect goto, we can't change its
- // destination.
- if (isa<IndirectBrInst>(Pred->getTerminator()))
+ // destination. Same for CallBr.
+ if (isa<IndirectBrInst>(Pred->getTerminator()) ||
+ isa<CallBrInst>(Pred->getTerminator()))
continue;
PredToDestList.push_back(std::make_pair(Pred, DestBB));
@@ -1657,7 +1666,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
// not thread. By doing so, we do not need to duplicate the current block and
// also miss potential opportunities in case we dont/cant duplicate.
if (OnlyDest && OnlyDest != MultipleDestSentinel) {
- if (PredWithKnownDest == (size_t)pred_size(BB)) {
+ if (BB->hasNPredecessors(PredToDestList.size())) {
bool SeenFirstBranchToOnlyDest = false;
std::vector <DominatorTree::UpdateType> Updates;
Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1);
@@ -1674,7 +1683,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
Instruction *Term = BB->getTerminator();
BranchInst::Create(OnlyDest, Term);
Term->eraseFromParent();
- DTU->applyUpdates(Updates);
+ DTU->applyUpdatesPermissive(Updates);
// If the condition is now dead due to the removal of the old terminator,
// erase it.
@@ -1976,8 +1985,14 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
}
BasicBlock::iterator BI = BB->begin();
- for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
- ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+ // Clone the phi nodes of BB into NewBB. The resulting phi nodes are trivial,
+ // since NewBB only has one predecessor, but SSAUpdater might need to rewrite
+ // the operand of the cloned phi.
+ for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
+ PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
+ NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
+ ValueMapping[PN] = NewPN;
+ }
// Clone the non-phi instructions of BB into NewBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
@@ -2016,9 +2031,9 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
}
// Enqueue required DT updates.
- DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB},
- {DominatorTree::Insert, PredBB, NewBB},
- {DominatorTree::Delete, PredBB, BB}});
+ DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, SuccBB},
+ {DominatorTree::Insert, PredBB, NewBB},
+ {DominatorTree::Delete, PredBB, BB}});
// If there were values defined in BB that are used outside the block, then we
// now have to update all uses of the value to use either the original value,
@@ -2112,7 +2127,7 @@ BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB,
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
}
- DTU->applyUpdates(Updates);
+ DTU->applyUpdatesPermissive(Updates);
return NewBBs[0];
}
@@ -2385,7 +2400,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
// Remove the unconditional branch at the end of the PredBB block.
OldPredBranch->eraseFromParent();
- DTU->applyUpdates(Updates);
+ DTU->applyUpdatesPermissive(Updates);
++NumDupes;
return true;
@@ -2421,8 +2436,8 @@ void JumpThreadingPass::UnfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
// The select is now dead.
SI->eraseFromParent();
- DTU->applyUpdates({{DominatorTree::Insert, NewBB, BB},
- {DominatorTree::Insert, Pred, NewBB}});
+ DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, BB},
+ {DominatorTree::Insert, Pred, NewBB}});
// Update any other PHI nodes in BB.
for (BasicBlock::iterator BI = BB->begin();
@@ -2599,7 +2614,7 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
Updates.push_back({DominatorTree::Delete, BB, Succ});
Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
}
- DTU->applyUpdates(Updates);
+ DTU->applyUpdatesPermissive(Updates);
return true;
}
return false;
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index d204654c3915..d9dda4cef2d2 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -1,9 +1,8 @@
//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -55,6 +54,7 @@
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
@@ -107,17 +107,29 @@ static cl::opt<int>
LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
cl::desc("How many instruction to cross product using AA"));
-// Experimental option to allow imprecision in LICM (use MemorySSA cap) in
-// pathological cases, in exchange for faster compile. This is to be removed
-// if MemorySSA starts to address the same issue. This flag applies only when
-// LICM uses MemorySSA instead on AliasSetTracker. When the flag is disabled
-// (default), LICM calls MemorySSAWalker's getClobberingMemoryAccess, which
-// gets perfect accuracy. When flag is enabled, LICM will call into MemorySSA's
-// getDefiningAccess, which may not be precise, since optimizeUses is capped.
-static cl::opt<bool> EnableLicmCap(
- "enable-licm-cap", cl::init(false), cl::Hidden,
- cl::desc("Enable imprecision in LICM (uses MemorySSA cap) in "
- "pathological cases, in exchange for faster compile"));
+// Experimental option to allow imprecision in LICM in pathological cases, in
+// exchange for faster compile. This is to be removed if MemorySSA starts to
+// address the same issue. This flag applies only when LICM uses MemorySSA
+// instead on AliasSetTracker. LICM calls MemorySSAWalker's
+// getClobberingMemoryAccess, up to the value of the Cap, getting perfect
+// accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
+// which may not be precise, since optimizeUses is capped. The result is
+// correct, but we may not get as "far up" as possible to get which access is
+// clobbering the one queried.
+cl::opt<unsigned> llvm::SetLicmMssaOptCap(
+ "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
+ cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
+ "for faster compile. Caps the MemorySSA clobbering calls."));
+
+// Experimentally, memory promotion carries less importance than sinking and
+// hoisting. Limit when we do promotion when using MemorySSA, in order to save
+// compile time.
+cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
+ "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
+ cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
+ "effect. When MSSA in LICM is enabled, then this is the maximum "
+ "number of accesses allowed to be present in a loop in order to "
+ "enable memory promotion."));
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
@@ -128,8 +140,7 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
- MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE,
- bool FreeInLoop);
+ MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
static bool isSafeToExecuteUnconditionally(Instruction &Inst,
const DominatorTree *DT,
const Loop *CurLoop,
@@ -140,7 +151,8 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
AliasSetTracker *CurAST, Loop *CurLoop,
AliasAnalysis *AA);
static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
- Loop *CurLoop);
+ Loop *CurLoop,
+ SinkAndHoistLICMFlags &Flags);
static Instruction *CloneInstructionInExitBlock(
Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
@@ -149,7 +161,8 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
static void moveInstructionBefore(Instruction &I, Instruction &Dest,
- ICFLoopSafetyInfo &SafetyInfo);
+ ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater *MSSAU);
namespace {
struct LoopInvariantCodeMotion {
@@ -160,17 +173,29 @@ struct LoopInvariantCodeMotion {
OptimizationRemarkEmitter *ORE, bool DeleteAST);
ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; }
+ LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
+ unsigned LicmMssaNoAccForPromotionCap)
+ : LicmMssaOptCap(LicmMssaOptCap),
+ LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
private:
ASTrackerMapTy LoopToAliasSetMap;
+ unsigned LicmMssaOptCap;
+ unsigned LicmMssaNoAccForPromotionCap;
std::unique_ptr<AliasSetTracker>
collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA);
+ std::unique_ptr<AliasSetTracker>
+ collectAliasInfoForLoopWithMSSA(Loop *L, AliasAnalysis *AA,
+ MemorySSAUpdater *MSSAU);
};
struct LegacyLICMPass : public LoopPass {
static char ID; // Pass identification, replacement for typeid
- LegacyLICMPass() : LoopPass(ID) {
+ LegacyLICMPass(
+ unsigned LicmMssaOptCap = SetLicmMssaOptCap,
+ unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap)
+ : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) {
initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
}
@@ -219,8 +244,16 @@ struct LegacyLICMPass : public LoopPass {
using llvm::Pass::doFinalization;
bool doFinalization() override {
- assert(LICM.getLoopToAliasSetMap().empty() &&
+ auto &AliasSetMap = LICM.getLoopToAliasSetMap();
+ // All loops in the AliasSetMap should be cleaned up already. The only case
+ // where we fail to do so is if an outer loop gets deleted before LICM
+ // visits it.
+ assert(all_of(AliasSetMap,
+ [](LoopInvariantCodeMotion::ASTrackerMapTy::value_type &KV) {
+ return !KV.first->getParentLoop();
+ }) &&
"Didn't free loop alias sets");
+ AliasSetMap.clear();
return false;
}
@@ -252,7 +285,7 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "
"cached at a higher level");
- LoopInvariantCodeMotion LICM;
+ LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
AR.MSSA, ORE, true))
return PreservedAnalyses::all();
@@ -261,6 +294,8 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
+ if (EnableMSSALoopDependency)
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
@@ -276,6 +311,10 @@ INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
false)
Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
+Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
+ unsigned LicmMssaNoAccForPromotionCap) {
+ return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
+}
/// Hoist expressions out of the specified loop. Note, alias info for inner
/// loop is not preserved so it is not a good idea to run LICM multiple
@@ -293,12 +332,31 @@ bool LoopInvariantCodeMotion::runOnLoop(
std::unique_ptr<AliasSetTracker> CurAST;
std::unique_ptr<MemorySSAUpdater> MSSAU;
+ bool NoOfMemAccTooLarge = false;
+ unsigned LicmMssaOptCounter = 0;
+
if (!MSSA) {
LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
CurAST = collectAliasInfoForLoop(L, LI, AA);
} else {
- LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA. Promotion disabled.\n");
+ LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
MSSAU = make_unique<MemorySSAUpdater>(MSSA);
+
+ unsigned AccessCapCount = 0;
+ for (auto *BB : L->getBlocks()) {
+ if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
+ for (const auto &MA : *Accesses) {
+ (void)MA;
+ AccessCapCount++;
+ if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
+ NoOfMemAccTooLarge = true;
+ break;
+ }
+ }
+ }
+ if (NoOfMemAccTooLarge)
+ break;
+ }
}
// Get the preheader block to move instructions into...
@@ -317,13 +375,16 @@ bool LoopInvariantCodeMotion::runOnLoop(
// that we are guaranteed to see definitions before we see uses. This allows
// us to sink instructions in one pass, without iteration. After sinking
// instructions, we perform another pass to hoist them out of the loop.
- //
+ SinkAndHoistLICMFlags Flags = {NoOfMemAccTooLarge, LicmMssaOptCounter,
+ LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*IsSink=*/true};
if (L->hasDedicatedExits())
Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
- CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
+ CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
+ Flags.IsSink = false;
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
- CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
+ CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
@@ -332,7 +393,8 @@ bool LoopInvariantCodeMotion::runOnLoop(
// make sure we catch that. An additional load may be generated in the
// preheader for SSA updater, so also avoid sinking when no preheader
// is available.
- if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+ if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
+ !NoOfMemAccTooLarge) {
// Figure out the loop exits and their insertion points
SmallVector<BasicBlock *, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@@ -344,38 +406,45 @@ bool LoopInvariantCodeMotion::runOnLoop(
if (!HasCatchSwitch) {
SmallVector<Instruction *, 8> InsertPts;
+ SmallVector<MemoryAccess *, 8> MSSAInsertPts;
InsertPts.reserve(ExitBlocks.size());
- for (BasicBlock *ExitBlock : ExitBlocks)
+ if (MSSAU)
+ MSSAInsertPts.reserve(ExitBlocks.size());
+ for (BasicBlock *ExitBlock : ExitBlocks) {
InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
+ if (MSSAU)
+ MSSAInsertPts.push_back(nullptr);
+ }
PredIteratorCache PIC;
bool Promoted = false;
- if (CurAST.get()) {
- // Loop over all of the alias sets in the tracker object.
- for (AliasSet &AS : *CurAST) {
- // We can promote this alias set if it has a store, if it is a "Must"
- // alias set, if the pointer is loop invariant, and if we are not
- // eliminating any volatile loads or stores.
- if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
- !L->isLoopInvariant(AS.begin()->getValue()))
- continue;
-
- assert(
- !AS.empty() &&
- "Must alias set should have at least one pointer element in it!");
-
- SmallSetVector<Value *, 8> PointerMustAliases;
- for (const auto &ASI : AS)
- PointerMustAliases.insert(ASI.getValue());
-
- Promoted |= promoteLoopAccessesToScalars(
- PointerMustAliases, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L,
- CurAST.get(), &SafetyInfo, ORE);
- }
+ // Build an AST using MSSA.
+ if (!CurAST.get())
+ CurAST = collectAliasInfoForLoopWithMSSA(L, AA, MSSAU.get());
+
+ // Loop over all of the alias sets in the tracker object.
+ for (AliasSet &AS : *CurAST) {
+ // We can promote this alias set if it has a store, if it is a "Must"
+ // alias set, if the pointer is loop invariant, and if we are not
+ // eliminating any volatile loads or stores.
+ if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+ !L->isLoopInvariant(AS.begin()->getValue()))
+ continue;
+
+ assert(
+ !AS.empty() &&
+ "Must alias set should have at least one pointer element in it!");
+
+ SmallSetVector<Value *, 8> PointerMustAliases;
+ for (const auto &ASI : AS)
+ PointerMustAliases.insert(ASI.getValue());
+
+ Promoted |= promoteLoopAccessesToScalars(
+ PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
+ DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
}
- // FIXME: Promotion initially disabled when using MemorySSA.
// Once we have promoted values across the loop body we have to
// recursively reform LCSSA as any nested loop may now have values defined
@@ -399,7 +468,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
// If this loop is nested inside of another one, save the alias information
// for when we process the outer loop.
- if (CurAST.get() && L->getParentLoop() && !DeleteAST)
+ if (!MSSAU.get() && CurAST.get() && L->getParentLoop() && !DeleteAST)
LoopToAliasSetMap[L] = std::move(CurAST);
if (MSSAU.get() && VerifyMemorySSA)
@@ -420,6 +489,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
TargetTransformInfo *TTI, Loop *CurLoop,
AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
ICFLoopSafetyInfo *SafetyInfo,
+ SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE) {
// Verify inputs.
@@ -463,9 +533,10 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
//
bool FreeInLoop = false;
if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
- canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
+ canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
+ ORE) &&
!I.mayHaveSideEffects()) {
- if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE, FreeInLoop)) {
+ if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
if (!FreeInLoop) {
++II;
eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
@@ -718,6 +789,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
ICFLoopSafetyInfo *SafetyInfo,
+ SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE) {
// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
@@ -770,7 +842,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// and we have accurately duplicated the control flow from the loop header
// to that block.
if (CurLoop->hasLoopInvariantOperands(&I) &&
- canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
+ canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
+ ORE) &&
isSafeToExecuteUnconditionally(
I, DT, CurLoop, SafetyInfo, ORE,
CurLoop->getLoopPreheader()->getTerminator())) {
@@ -808,13 +881,18 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
continue;
}
- using namespace PatternMatch;
- if (((I.use_empty() &&
- match(&I, m_Intrinsic<Intrinsic::invariant_start>())) ||
- isGuard(&I)) &&
+ auto IsInvariantStart = [&](Instruction &I) {
+ using namespace PatternMatch;
+ return I.use_empty() &&
+ match(&I, m_Intrinsic<Intrinsic::invariant_start>());
+ };
+ auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
+ return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
+ SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
+ };
+ if ((IsInvariantStart(I) || isGuard(&I)) &&
CurLoop->hasLoopInvariantOperands(&I) &&
- SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
- SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop)) {
+ MustExecuteWithoutWritesBefore(I)) {
hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
MSSAU, ORE);
HoistedInstructions.push_back(&I);
@@ -867,7 +945,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
LLVM_DEBUG(dbgs() << "LICM rehoisting to "
<< HoistPoint->getParent()->getName()
<< ": " << *I << "\n");
- moveInstructionBefore(*I, *HoistPoint, *SafetyInfo);
+ moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU);
HoistPoint = I;
Changed = true;
}
@@ -897,8 +975,7 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
Loop *CurLoop) {
Value *Addr = LI->getOperand(0);
const DataLayout &DL = LI->getModule()->getDataLayout();
- const uint32_t LocSizeInBits = DL.getTypeSizeInBits(
- cast<PointerType>(Addr->getType())->getElementType());
+ const uint32_t LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
// if the type is i8 addrspace(x)*, we know this is the type of
// llvm.invariant.start operand
@@ -945,16 +1022,15 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
namespace {
/// Return true if-and-only-if we know how to (mechanically) both hoist and
/// sink a given instruction out of a loop. Does not address legality
-/// concerns such as aliasing or speculation safety.
+/// concerns such as aliasing or speculation safety.
bool isHoistableAndSinkableInst(Instruction &I) {
// Only these instructions are hoistable/sinkable.
- return (isa<LoadInst>(I) || isa<StoreInst>(I) ||
- isa<CallInst>(I) || isa<FenceInst>(I) ||
- isa<BinaryOperator>(I) || isa<CastInst>(I) ||
- isa<SelectInst>(I) || isa<GetElementPtrInst>(I) ||
- isa<CmpInst>(I) || isa<InsertElementInst>(I) ||
- isa<ExtractElementInst>(I) || isa<ShuffleVectorInst>(I) ||
- isa<ExtractValueInst>(I) || isa<InsertValueInst>(I));
+ return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
+ isa<FenceInst>(I) || isa<BinaryOperator>(I) || isa<CastInst>(I) ||
+ isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
+ isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+ isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
+ isa<InsertValueInst>(I));
}
/// Return true if all of the alias sets within this AST are known not to
/// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop.
@@ -997,12 +1073,15 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
Loop *CurLoop, AliasSetTracker *CurAST,
MemorySSAUpdater *MSSAU,
bool TargetExecutesOncePerLoop,
+ SinkAndHoistLICMFlags *Flags,
OptimizationRemarkEmitter *ORE) {
// If we don't understand the instruction, bail early.
if (!isHoistableAndSinkableInst(I))
return false;
MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
+ if (MSSA)
+ assert(Flags != nullptr && "Flags cannot be null.");
// Loads have extra constraints we have to verify before we can hoist them.
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
@@ -1029,7 +1108,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
CurLoop, AA);
else
Invalidated = pointerInvalidatedByLoopWithMSSA(
- MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop);
+ MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, *Flags);
// Check loop-invariant address because this may also be a sinkable load
// whose address is not necessarily loop-invariant.
if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
@@ -1074,7 +1153,8 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
CurAST, CurLoop, AA);
else
Invalidated = pointerInvalidatedByLoopWithMSSA(
- MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop);
+ MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop,
+ *Flags);
if (Invalidated)
return false;
}
@@ -1133,13 +1213,46 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
} else { // MSSAU
if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
return true;
- if (!EnableLicmCap) {
- auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
- if (MSSA->isLiveOnEntryDef(Source) ||
- !CurLoop->contains(Source->getBlock()))
- return true;
- }
- return false;
+ // If there are more accesses than the Promotion cap, give up, we're not
+ // walking a list that long.
+ if (Flags->NoOfMemAccTooLarge)
+ return false;
+ // Check store only if there's still "quota" to check clobber.
+ if (Flags->LicmMssaOptCounter >= Flags->LicmMssaOptCap)
+ return false;
+ // If there are interfering Uses (i.e. their defining access is in the
+ // loop), or ordered loads (stored as Defs!), don't move this store.
+ // Could do better here, but this is conservatively correct.
+ // TODO: Cache set of Uses on the first walk in runOnLoop, update when
+ // moving accesses. Can also extend to dominating uses.
+ auto *SIMD = MSSA->getMemoryAccess(SI);
+ for (auto *BB : CurLoop->getBlocks())
+ if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
+ for (const auto &MA : *Accesses)
+ if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
+ auto *MD = MU->getDefiningAccess();
+ if (!MSSA->isLiveOnEntryDef(MD) &&
+ CurLoop->contains(MD->getBlock()))
+ return false;
+ // Disable hoisting past potentially interfering loads. Optimized
+ // Uses may point to an access outside the loop, as getClobbering
+ // checks the previous iteration when walking the backedge.
+ // FIXME: More precise: no Uses that alias SI.
+ if (!Flags->IsSink && !MSSA->dominates(SIMD, MU))
+ return false;
+ } else if (const auto *MD = dyn_cast<MemoryDef>(&MA))
+ if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
+ (void)LI; // Silence warning.
+ assert(!LI->isUnordered() && "Expected unordered load");
+ return false;
+ }
+ }
+
+ auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
+ Flags->LicmMssaOptCounter++;
+ // If there are no clobbering Defs in the loop, store is safe to hoist.
+ return MSSA->isLiveOnEntryDef(Source) ||
+ !CurLoop->contains(Source->getBlock());
}
}
@@ -1233,7 +1346,7 @@ static Instruction *CloneInstructionInExitBlock(
// Sinking call-sites need to be handled differently from other
// instructions. The cloned call-site needs a funclet bundle operand
- // appropriate for it's location in the CFG.
+ // appropriate for its location in the CFG.
SmallVector<OperandBundleDef, 1> OpBundles;
for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
BundleIdx != BundleEnd; ++BundleIdx) {
@@ -1310,10 +1423,15 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
}
static void moveInstructionBefore(Instruction &I, Instruction &Dest,
- ICFLoopSafetyInfo &SafetyInfo) {
+ ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater *MSSAU) {
SafetyInfo.removeInstruction(&I);
SafetyInfo.insertInstructionTo(&I, Dest.getParent());
I.moveBefore(&Dest);
+ if (MSSAU)
+ if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
+ MSSAU->getMemorySSA()->getMemoryAccess(&I)))
+ MSSAU->moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::End);
}
static Instruction *sinkThroughTriviallyReplaceablePHI(
@@ -1426,8 +1544,7 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
///
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
- MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE,
- bool FreeInLoop) {
+ MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE) {
LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
@@ -1441,7 +1558,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
++NumSunk;
// Iterate over users to be ready for actual sinking. Replace users via
- // unrechable blocks with undef and make all user PHIs trivially replcable.
+ // unreachable blocks with undef and make all user PHIs trivially replaceable.
SmallPtrSet<Instruction *, 8> VisitedUsers;
for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
auto *User = cast<Instruction>(*UI);
@@ -1549,25 +1666,15 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
if (isa<PHINode>(I))
// Move the new node to the end of the phi list in the destination block.
- moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo);
+ moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU);
else
// Move the new node to the destination block, before its terminator.
- moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo);
- if (MSSAU) {
- // If moving, I just moved a load or store, so update MemorySSA.
- MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
- MSSAU->getMemorySSA()->getMemoryAccess(&I));
- if (OldMemAcc)
- MSSAU->moveToPlace(OldMemAcc, Dest, MemorySSA::End);
- }
+ moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU);
- // Do not retain debug locations when we are moving instructions to different
- // basic blocks, because we want to avoid jumpy line tables. Calls, however,
- // need to retain their debug locs because they may be inlined.
- // FIXME: How do we retain source locations without causing poor debugging
- // behavior?
- if (!isa<CallInst>(I))
- I.setDebugLoc(DebugLoc());
+ // Apply line 0 debug locations when we are moving instructions to different
+ // basic blocks because we want to avoid jumpy line tables.
+ if (const DebugLoc &DL = I.getDebugLoc())
+ I.setDebugLoc(DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
if (isa<LoadInst>(I))
++NumMovedLoads;
@@ -1611,8 +1718,10 @@ class LoopPromoter : public LoadAndStorePromoter {
const SmallSetVector<Value *, 8> &PointerMustAliases;
SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
SmallVectorImpl<Instruction *> &LoopInsertPts;
+ SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
PredIteratorCache &PredCache;
AliasSetTracker &AST;
+ MemorySSAUpdater *MSSAU;
LoopInfo &LI;
DebugLoc DL;
int Alignment;
@@ -1639,15 +1748,16 @@ public:
LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
const SmallSetVector<Value *, 8> &PMA,
SmallVectorImpl<BasicBlock *> &LEB,
- SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
- AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
- bool UnorderedAtomic, const AAMDNodes &AATags,
- ICFLoopSafetyInfo &SafetyInfo)
+ SmallVectorImpl<Instruction *> &LIP,
+ SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
+ AliasSetTracker &ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
+ DebugLoc dl, int alignment, bool UnorderedAtomic,
+ const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
- LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
- LI(li), DL(std::move(dl)), Alignment(alignment),
- UnorderedAtomic(UnorderedAtomic), AATags(AATags), SafetyInfo(SafetyInfo)
- {}
+ LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
+ PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
+ Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
+ SafetyInfo(SafetyInfo) {}
bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction *> &) const override {
@@ -1659,7 +1769,7 @@ public:
return PointerMustAliases.count(Ptr);
}
- void doExtraRewritesBeforeFinalDeletion() const override {
+ void doExtraRewritesBeforeFinalDeletion() override {
// Insert stores after in the loop exit blocks. Each exit block gets a
// store of the live-out values that feed them. Since we've already told
// the SSA updater about the defs in the loop and the preheader
@@ -1677,6 +1787,21 @@ public:
NewSI->setDebugLoc(DL);
if (AATags)
NewSI->setAAMetadata(AATags);
+
+ if (MSSAU) {
+ MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
+ MemoryAccess *NewMemAcc;
+ if (!MSSAInsertPoint) {
+ NewMemAcc = MSSAU->createMemoryAccessInBB(
+ NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
+ } else {
+ NewMemAcc =
+ MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
+ }
+ MSSAInsertPts[i] = NewMemAcc;
+ MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
+ // FIXME: true for safety, false may still be correct.
+ }
}
}
@@ -1687,6 +1812,8 @@ public:
void instructionDeleted(Instruction *I) const override {
SafetyInfo.removeInstruction(I);
AST.deleteValue(I);
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(I);
}
};
@@ -1723,10 +1850,11 @@ bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) {
bool llvm::promoteLoopAccessesToScalars(
const SmallSetVector<Value *, 8> &PointerMustAliases,
SmallVectorImpl<BasicBlock *> &ExitBlocks,
- SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC,
+ SmallVectorImpl<Instruction *> &InsertPts,
+ SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
- Loop *CurLoop, AliasSetTracker *CurAST, ICFLoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE) {
+ Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
+ ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
// Verify inputs.
assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
CurAST != nullptr && SafetyInfo != nullptr &&
@@ -1827,9 +1955,21 @@ bool llvm::promoteLoopAccessesToScalars(
SawUnorderedAtomic |= Load->isAtomic();
SawNotAtomic |= !Load->isAtomic();
- if (!DereferenceableInPH)
- DereferenceableInPH = isSafeToExecuteUnconditionally(
- *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator());
+ unsigned InstAlignment = Load->getAlignment();
+ if (!InstAlignment)
+ InstAlignment =
+ MDL.getABITypeAlignment(Load->getType());
+
+ // Note that proving a load safe to speculate requires proving
+ // sufficient alignment at the target location. Proving it guaranteed
+ // to execute does as well. Thus we can increase our guaranteed
+ // alignment as well.
+ if (!DereferenceableInPH || (InstAlignment > Alignment))
+ if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo,
+ ORE, Preheader->getTerminator())) {
+ DereferenceableInPH = true;
+ Alignment = std::max(Alignment, InstAlignment);
+ }
} else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
@@ -1875,8 +2015,8 @@ bool llvm::promoteLoopAccessesToScalars(
// deref info through it.
if (!DereferenceableInPH) {
DereferenceableInPH = isDereferenceableAndAlignedPointer(
- Store->getPointerOperand(), Store->getAlignment(), MDL,
- Preheader->getTerminator(), DT);
+ Store->getPointerOperand(), Store->getValueOperand()->getType(),
+ Store->getAlignment(), MDL, Preheader->getTerminator(), DT);
}
} else
return false; // Not a load or store.
@@ -1900,6 +2040,14 @@ bool llvm::promoteLoopAccessesToScalars(
if (SawUnorderedAtomic && SawNotAtomic)
return false;
+ // If we're inserting an atomic load in the preheader, we must be able to
+ // lower it. We're only guaranteed to be able to lower naturally aligned
+ // atomics.
+ auto *SomePtrElemType = SomePtr->getType()->getPointerElementType();
+ if (SawUnorderedAtomic &&
+ Alignment < MDL.getTypeStoreSize(SomePtrElemType))
+ return false;
+
// If we couldn't prove we can hoist the load, bail.
if (!DereferenceableInPH)
return false;
@@ -1943,13 +2091,14 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVector<PHINode *, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- InsertPts, PIC, *CurAST, *LI, DL, Alignment,
- SawUnorderedAtomic, AATags, *SafetyInfo);
+ InsertPts, MSSAInsertPts, PIC, *CurAST, MSSAU, *LI, DL,
+ Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
LoadInst *PreheaderLoad = new LoadInst(
- SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator());
+ SomePtr->getType()->getPointerElementType(), SomePtr,
+ SomePtr->getName() + ".promoted", Preheader->getTerminator());
if (SawUnorderedAtomic)
PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
PreheaderLoad->setAlignment(Alignment);
@@ -1958,13 +2107,23 @@ bool llvm::promoteLoopAccessesToScalars(
PreheaderLoad->setAAMetadata(AATags);
SSA.AddAvailableValue(Preheader, PreheaderLoad);
+ MemoryAccess *PreheaderLoadMemoryAccess;
+ if (MSSAU) {
+ PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
+ PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
+ MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
+ MSSAU->insertUse(NewMemUse);
+ }
+
// Rewrite all the loads in the loop and remember all the definitions from
// stores in the loop.
Promoter.run(LoopUses);
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
// If the SSAUpdater didn't use the load in the preheader, just zap it now.
if (PreheaderLoad->use_empty())
- eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, nullptr);
+ eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU);
return true;
}
@@ -2017,6 +2176,15 @@ LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
return CurAST;
}
+std::unique_ptr<AliasSetTracker>
+LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA(
+ Loop *L, AliasAnalysis *AA, MemorySSAUpdater *MSSAU) {
+ auto *MSSA = MSSAU->getMemorySSA();
+ auto CurAST = make_unique<AliasSetTracker>(*AA, MSSA, L);
+ CurAST->addAllInstructionsInLoopUsingMSSA();
+ return CurAST;
+}
+
/// Simple analysis hook. Clone alias set info.
///
void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
@@ -2095,15 +2263,49 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
}
static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
- Loop *CurLoop) {
- MemoryAccess *Source;
- // See declaration of EnableLicmCap for usage details.
- if (EnableLicmCap)
- Source = MU->getDefiningAccess();
- else
- Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
- return !MSSA->isLiveOnEntryDef(Source) &&
- CurLoop->contains(Source->getBlock());
+ Loop *CurLoop,
+ SinkAndHoistLICMFlags &Flags) {
+ // For hoisting, use the walker to determine safety
+ if (!Flags.IsSink) {
+ MemoryAccess *Source;
+ // See declaration of SetLicmMssaOptCap for usage details.
+ if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap)
+ Source = MU->getDefiningAccess();
+ else {
+ Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
+ Flags.LicmMssaOptCounter++;
+ }
+ return !MSSA->isLiveOnEntryDef(Source) &&
+ CurLoop->contains(Source->getBlock());
+ }
+
+ // For sinking, we'd need to check all Defs below this use. The getClobbering
+ // call will look on the backedge of the loop, but will check aliasing with
+ // the instructions on the previous iteration.
+ // For example:
+ // for (i ... )
+ // load a[i] ( Use (LoE)
+ // store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
+ // i++;
+ // The load sees no clobbering inside the loop, as the backedge alias check
+ // does phi translation, and will check aliasing against store a[i-1].
+ // However sinking the load outside the loop, below the store is incorrect.
+
+ // For now, only sink if there are no Defs in the loop, and the existing ones
+ // precede the use and are in the same block.
+ // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
+ // needs PostDominatorTreeAnalysis.
+ // FIXME: More precise: no Defs that alias this Use.
+ if (Flags.NoOfMemAccTooLarge)
+ return true;
+ for (auto *BB : CurLoop->getBlocks())
+ if (auto *Accesses = MSSA->getBlockDefs(BB))
+ for (const auto &MA : *Accesses)
+ if (const auto *MD = dyn_cast<MemoryDef>(&MA))
+ if (MU->getBlock() != MD->getBlock() ||
+ !MSSA->locallyDominates(MD, MU))
+ return true;
+ return false;
}
/// Little predicate that returns true if the specified basic block is in
diff --git a/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp b/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
index a64c99117d64..1c3ff1a61b7e 100644
--- a/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
+++ b/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
@@ -1,9 +1,8 @@
//===- LoopAccessAnalysisPrinter.cpp - Loop Access Analysis Printer --------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index 3b41b5d96c86..1fcf1315a177 100644
--- a/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -1,9 +1,8 @@
//===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -313,7 +312,8 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
IRBuilder<> Builder(MemI);
Module *M = BB->getParent()->getParent();
Type *I32 = Type::getInt32Ty(BB->getContext());
- Value *PrefetchFunc = Intrinsic::getDeclaration(M, Intrinsic::prefetch);
+ Function *PrefetchFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::prefetch);
Builder.CreateCall(
PrefetchFunc,
{PrefPtrValue,
@@ -333,4 +333,3 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
return MadeChange;
}
-
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index d412025d7e94..8371367e24e7 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -1,9 +1,8 @@
//===- LoopDeletion.cpp - Dead Loop Deletion Pass ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/LoopDistribute.cpp b/lib/Transforms/Scalar/LoopDistribute.cpp
index d797c9dc9e72..f45e5fd0f50b 100644
--- a/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -1,9 +1,8 @@
//===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -767,8 +766,14 @@ public:
"cannot isolate unsafe dependencies");
}
- // Don't distribute the loop if we need too many SCEV run-time checks.
+ // Don't distribute the loop if we need too many SCEV run-time checks, or
+ // any if it's illegal.
const SCEVUnionPredicate &Pred = LAI->getPSE().getUnionPredicate();
+ if (LAI->hasConvergentOp() && !Pred.isAlwaysTrue()) {
+ return fail("RuntimeCheckWithConvergent",
+ "may not insert runtime check with convergent operation");
+ }
+
if (Pred.getComplexity() > (IsForced.getValueOr(false)
? PragmaDistributeSCEVCheckThreshold
: DistributeSCEVCheckThreshold))
@@ -796,7 +801,14 @@ public:
auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
RtPtrChecking);
+ if (LAI->hasConvergentOp() && !Checks.empty()) {
+ return fail("RuntimeCheckWithConvergent",
+ "may not insert runtime check with convergent operation");
+ }
+
if (!Pred.isAlwaysTrue() || !Checks.empty()) {
+ assert(!LAI->hasConvergentOp() && "inserting illegal loop versioning");
+
MDNode *OrigLoopID = L->getLoopID();
LLVM_DEBUG(dbgs() << "\nPointers:\n");
diff --git a/lib/Transforms/Scalar/LoopFuse.cpp b/lib/Transforms/Scalar/LoopFuse.cpp
new file mode 100644
index 000000000000..0bc2bcff2ae1
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopFuse.cpp
@@ -0,0 +1,1215 @@
+//===- LoopFuse.cpp - Loop Fusion Pass ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the loop fusion pass.
+/// The implementation is largely based on the following document:
+///
+/// Code Transformations to Augment the Scope of Loop Fusion in a
+/// Production Compiler
+/// Christopher Mark Barton
+/// MSc Thesis
+/// https://webdocs.cs.ualberta.ca/~amaral/thesis/ChristopherBartonMSc.pdf
+///
+/// The general approach taken is to collect sets of control flow equivalent
+/// loops and test whether they can be fused. The necessary conditions for
+/// fusion are:
+/// 1. The loops must be adjacent (there cannot be any statements between
+/// the two loops).
+/// 2. The loops must be conforming (they must execute the same number of
+/// iterations).
+/// 3. The loops must be control flow equivalent (if one loop executes, the
+/// other is guaranteed to execute).
+/// 4. There cannot be any negative distance dependencies between the loops.
+/// If all of these conditions are satisfied, it is safe to fuse the loops.
+///
+/// This implementation creates FusionCandidates that represent the loop and the
+/// necessary information needed by fusion. It then operates on the fusion
+/// candidates, first confirming that the candidate is eligible for fusion. The
+/// candidates are then collected into control flow equivalent sets, sorted in
+/// dominance order. Each set of control flow equivalent candidates is then
+/// traversed, attempting to fuse pairs of candidates in the set. If all
+/// requirements for fusion are met, the two candidates are fused, creating a
+/// new (fused) candidate which is then added back into the set to consider for
+/// additional fusion.
+///
+/// This implementation currently does not make any modifications to remove
+/// conditions for fusion. Code transformations to make loops conform to each of
+/// the conditions for fusion are discussed in more detail in the document
+/// above. These can be added to the current implementation in the future.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LoopFuse.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-fusion"
+
+STATISTIC(FuseCounter, "Count number of loop fusions performed");
+STATISTIC(NumFusionCandidates, "Number of candidates for loop fusion");
+STATISTIC(InvalidPreheader, "Loop has invalid preheader");
+STATISTIC(InvalidHeader, "Loop has invalid header");
+STATISTIC(InvalidExitingBlock, "Loop has invalid exiting blocks");
+STATISTIC(InvalidExitBlock, "Loop has invalid exit block");
+STATISTIC(InvalidLatch, "Loop has invalid latch");
+STATISTIC(InvalidLoop, "Loop is invalid");
+STATISTIC(AddressTakenBB, "Basic block has address taken");
+STATISTIC(MayThrowException, "Loop may throw an exception");
+STATISTIC(ContainsVolatileAccess, "Loop contains a volatile access");
+STATISTIC(NotSimplifiedForm, "Loop is not in simplified form");
+STATISTIC(InvalidDependencies, "Dependencies prevent fusion");
+STATISTIC(InvalidTripCount,
+ "Loop does not have invariant backedge taken count");
+STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop");
+STATISTIC(NonEqualTripCount, "Candidate trip counts are not the same");
+STATISTIC(NonAdjacent, "Candidates are not adjacent");
+STATISTIC(NonEmptyPreheader, "Candidate has a non-empty preheader");
+
+enum FusionDependenceAnalysisChoice {
+ FUSION_DEPENDENCE_ANALYSIS_SCEV,
+ FUSION_DEPENDENCE_ANALYSIS_DA,
+ FUSION_DEPENDENCE_ANALYSIS_ALL,
+};
+
+static cl::opt<FusionDependenceAnalysisChoice> FusionDependenceAnalysis(
+ "loop-fusion-dependence-analysis",
+ cl::desc("Which dependence analysis should loop fusion use?"),
+ cl::values(clEnumValN(FUSION_DEPENDENCE_ANALYSIS_SCEV, "scev",
+ "Use the scalar evolution interface"),
+ clEnumValN(FUSION_DEPENDENCE_ANALYSIS_DA, "da",
+ "Use the dependence analysis interface"),
+ clEnumValN(FUSION_DEPENDENCE_ANALYSIS_ALL, "all",
+ "Use all available analyses")),
+ cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL), cl::ZeroOrMore);
+
+#ifndef NDEBUG
+static cl::opt<bool>
+ VerboseFusionDebugging("loop-fusion-verbose-debug",
+ cl::desc("Enable verbose debugging for Loop Fusion"),
+ cl::Hidden, cl::init(false), cl::ZeroOrMore);
+#endif
+
+/// This class is used to represent a candidate for loop fusion. When it is
+/// constructed, it checks the conditions for loop fusion to ensure that it
+/// represents a valid candidate. It caches several parts of a loop that are
+/// used throughout loop fusion (e.g., loop preheader, loop header, etc) instead
+/// of continually querying the underlying Loop to retrieve these values. It is
+/// assumed these will not change throughout loop fusion.
+///
+/// The invalidate method should be used to indicate that the FusionCandidate is
+/// no longer a valid candidate for fusion. Similarly, the isValid() method can
+/// be used to ensure that the FusionCandidate is still valid for fusion.
+struct FusionCandidate {
+ /// Cache of parts of the loop used throughout loop fusion. These should not
+ /// need to change throughout the analysis and transformation.
+ /// These parts are cached to avoid repeatedly looking up in the Loop class.
+
+ /// Preheader of the loop this candidate represents
+ BasicBlock *Preheader;
+ /// Header of the loop this candidate represents
+ BasicBlock *Header;
+ /// Blocks in the loop that exit the loop
+ BasicBlock *ExitingBlock;
+ /// The successor block of this loop (where the exiting blocks go to)
+ BasicBlock *ExitBlock;
+ /// Latch of the loop
+ BasicBlock *Latch;
+ /// The loop that this fusion candidate represents
+ Loop *L;
+ /// Vector of instructions in this loop that read from memory
+ SmallVector<Instruction *, 16> MemReads;
+ /// Vector of instructions in this loop that write to memory
+ SmallVector<Instruction *, 16> MemWrites;
+ /// Are all of the members of this fusion candidate still valid
+ bool Valid;
+
+ /// Dominator and PostDominator trees are needed for the
+ /// FusionCandidateCompare function, required by FusionCandidateSet to
+ /// determine where the FusionCandidate should be inserted into the set. These
+ /// are used to establish ordering of the FusionCandidates based on dominance.
+ const DominatorTree *DT;
+ const PostDominatorTree *PDT;
+
+ FusionCandidate(Loop *L, const DominatorTree *DT,
+ const PostDominatorTree *PDT)
+ : Preheader(L->getLoopPreheader()), Header(L->getHeader()),
+ ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
+ Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT) {
+
+ // Walk over all blocks in the loop and check for conditions that may
+ // prevent fusion. For each block, walk over all instructions and collect
+ // the memory reads and writes If any instructions that prevent fusion are
+ // found, invalidate this object and return.
+ for (BasicBlock *BB : L->blocks()) {
+ if (BB->hasAddressTaken()) {
+ AddressTakenBB++;
+ invalidate();
+ return;
+ }
+
+ for (Instruction &I : *BB) {
+ if (I.mayThrow()) {
+ MayThrowException++;
+ invalidate();
+ return;
+ }
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ if (SI->isVolatile()) {
+ ContainsVolatileAccess++;
+ invalidate();
+ return;
+ }
+ }
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ if (LI->isVolatile()) {
+ ContainsVolatileAccess++;
+ invalidate();
+ return;
+ }
+ }
+ if (I.mayWriteToMemory())
+ MemWrites.push_back(&I);
+ if (I.mayReadFromMemory())
+ MemReads.push_back(&I);
+ }
+ }
+ }
+
+ /// Check if all members of the class are valid.
+ bool isValid() const {
+ return Preheader && Header && ExitingBlock && ExitBlock && Latch && L &&
+ !L->isInvalid() && Valid;
+ }
+
+ /// Verify that all members are in sync with the Loop object.
+ void verify() const {
+ assert(isValid() && "Candidate is not valid!!");
+ assert(!L->isInvalid() && "Loop is invalid!");
+ assert(Preheader == L->getLoopPreheader() && "Preheader is out of sync");
+ assert(Header == L->getHeader() && "Header is out of sync");
+ assert(ExitingBlock == L->getExitingBlock() &&
+ "Exiting Blocks is out of sync");
+ assert(ExitBlock == L->getExitBlock() && "Exit block is out of sync");
+ assert(Latch == L->getLoopLatch() && "Latch is out of sync");
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const {
+ dbgs() << "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr")
+ << "\n"
+ << "\tHeader: " << (Header ? Header->getName() : "nullptr") << "\n"
+ << "\tExitingBB: "
+ << (ExitingBlock ? ExitingBlock->getName() : "nullptr") << "\n"
+ << "\tExitBB: " << (ExitBlock ? ExitBlock->getName() : "nullptr")
+ << "\n"
+ << "\tLatch: " << (Latch ? Latch->getName() : "nullptr") << "\n";
+ }
+#endif
+
+private:
+ // This is only used internally for now, to clear the MemWrites and MemReads
+ // list and setting Valid to false. I can't envision other uses of this right
+ // now, since once FusionCandidates are put into the FusionCandidateSet they
+ // are immutable. Thus, any time we need to change/update a FusionCandidate,
+ // we must create a new one and insert it into the FusionCandidateSet to
+ // ensure the FusionCandidateSet remains ordered correctly.
+ void invalidate() {
+ MemWrites.clear();
+ MemReads.clear();
+ Valid = false;
+ }
+};
+
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+ const FusionCandidate &FC) {
+ if (FC.isValid())
+ OS << FC.Preheader->getName();
+ else
+ OS << "<Invalid>";
+
+ return OS;
+}
+
+struct FusionCandidateCompare {
+ /// Comparison functor to sort two Control Flow Equivalent fusion candidates
+ /// into dominance order.
+ /// If LHS dominates RHS and RHS post-dominates LHS, return true;
+ /// IF RHS dominates LHS and LHS post-dominates RHS, return false;
+ bool operator()(const FusionCandidate &LHS,
+ const FusionCandidate &RHS) const {
+ const DominatorTree *DT = LHS.DT;
+
+ // Do not save PDT to local variable as it is only used in asserts and thus
+ // will trigger an unused variable warning if building without asserts.
+ assert(DT && LHS.PDT && "Expecting valid dominator tree");
+
+ // Do this compare first so if LHS == RHS, function returns false.
+ if (DT->dominates(RHS.Preheader, LHS.Preheader)) {
+ // RHS dominates LHS
+ // Verify LHS post-dominates RHS
+ assert(LHS.PDT->dominates(LHS.Preheader, RHS.Preheader));
+ return false;
+ }
+
+ if (DT->dominates(LHS.Preheader, RHS.Preheader)) {
+ // Verify RHS Postdominates LHS
+ assert(LHS.PDT->dominates(RHS.Preheader, LHS.Preheader));
+ return true;
+ }
+
+ // If LHS does not dominate RHS and RHS does not dominate LHS then there is
+ // no dominance relationship between the two FusionCandidates. Thus, they
+ // should not be in the same set together.
+ llvm_unreachable(
+ "No dominance relationship between these fusion candidates!");
+ }
+};
+
+namespace {
+using LoopVector = SmallVector<Loop *, 4>;
+
+// Set of Control Flow Equivalent (CFE) Fusion Candidates, sorted in dominance
+// order. Thus, if FC0 comes *before* FC1 in a FusionCandidateSet, then FC0
+// dominates FC1 and FC1 post-dominates FC0.
+// std::set was chosen because we want a sorted data structure with stable
+// iterators. A subsequent patch to loop fusion will enable fusing non-ajdacent
+// loops by moving intervening code around. When this intervening code contains
+// loops, those loops will be moved also. The corresponding FusionCandidates
+// will also need to be moved accordingly. As this is done, having stable
+// iterators will simplify the logic. Similarly, having an efficient insert that
+// keeps the FusionCandidateSet sorted will also simplify the implementation.
+using FusionCandidateSet = std::set<FusionCandidate, FusionCandidateCompare>;
+using FusionCandidateCollection = SmallVector<FusionCandidateSet, 4>;
+} // namespace
+
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+ const FusionCandidateSet &CandSet) {
+ for (auto IT : CandSet)
+ OS << IT << "\n";
+
+ return OS;
+}
+
+#if !defined(NDEBUG)
+static void
+printFusionCandidates(const FusionCandidateCollection &FusionCandidates) {
+ dbgs() << "Fusion Candidates: \n";
+ for (const auto &CandidateSet : FusionCandidates) {
+ dbgs() << "*** Fusion Candidate Set ***\n";
+ dbgs() << CandidateSet;
+ dbgs() << "****************************\n";
+ }
+}
+#endif
+
+/// Collect all loops in function at the same nest level, starting at the
+/// outermost level.
+///
+/// This data structure collects all loops at the same nest level for a
+/// given function (specified by the LoopInfo object). It starts at the
+/// outermost level.
+struct LoopDepthTree {
+ using LoopsOnLevelTy = SmallVector<LoopVector, 4>;
+ using iterator = LoopsOnLevelTy::iterator;
+ using const_iterator = LoopsOnLevelTy::const_iterator;
+
+ LoopDepthTree(LoopInfo &LI) : Depth(1) {
+ if (!LI.empty())
+ LoopsOnLevel.emplace_back(LoopVector(LI.rbegin(), LI.rend()));
+ }
+
+ /// Test whether a given loop has been removed from the function, and thus is
+ /// no longer valid.
+ bool isRemovedLoop(const Loop *L) const { return RemovedLoops.count(L); }
+
+ /// Record that a given loop has been removed from the function and is no
+ /// longer valid.
+ void removeLoop(const Loop *L) { RemovedLoops.insert(L); }
+
+ /// Descend the tree to the next (inner) nesting level
+ void descend() {
+ LoopsOnLevelTy LoopsOnNextLevel;
+
+ for (const LoopVector &LV : *this)
+ for (Loop *L : LV)
+ if (!isRemovedLoop(L) && L->begin() != L->end())
+ LoopsOnNextLevel.emplace_back(LoopVector(L->begin(), L->end()));
+
+ LoopsOnLevel = LoopsOnNextLevel;
+ RemovedLoops.clear();
+ Depth++;
+ }
+
+ bool empty() const { return size() == 0; }
+ size_t size() const { return LoopsOnLevel.size() - RemovedLoops.size(); }
+ unsigned getDepth() const { return Depth; }
+
+ iterator begin() { return LoopsOnLevel.begin(); }
+ iterator end() { return LoopsOnLevel.end(); }
+ const_iterator begin() const { return LoopsOnLevel.begin(); }
+ const_iterator end() const { return LoopsOnLevel.end(); }
+
+private:
+ /// Set of loops that have been removed from the function and are no longer
+ /// valid.
+ SmallPtrSet<const Loop *, 8> RemovedLoops;
+
+ /// Depth of the current level, starting at 1 (outermost loops).
+ unsigned Depth;
+
+ /// Vector of loops at the current depth level that have the same parent loop
+ LoopsOnLevelTy LoopsOnLevel;
+};
+
+#ifndef NDEBUG
+static void printLoopVector(const LoopVector &LV) {
+ dbgs() << "****************************\n";
+ for (auto L : LV)
+ printLoop(*L, dbgs());
+ dbgs() << "****************************\n";
+}
+#endif
+
+static void reportLoopFusion(const FusionCandidate &FC0,
+ const FusionCandidate &FC1,
+ OptimizationRemarkEmitter &ORE) {
+ using namespace ore;
+ ORE.emit(
+ OptimizationRemark(DEBUG_TYPE, "LoopFusion", FC0.Preheader->getParent())
+ << "Fused " << NV("Cand1", StringRef(FC0.Preheader->getName()))
+ << " with " << NV("Cand2", StringRef(FC1.Preheader->getName())));
+}
+
+struct LoopFuser {
+private:
+ // Sets of control flow equivalent fusion candidates for a given nest level.
+ FusionCandidateCollection FusionCandidates;
+
+ LoopDepthTree LDT;
+ DomTreeUpdater DTU;
+
+ LoopInfo &LI;
+ DominatorTree &DT;
+ DependenceInfo &DI;
+ ScalarEvolution &SE;
+ PostDominatorTree &PDT;
+ OptimizationRemarkEmitter &ORE;
+
+public:
+ LoopFuser(LoopInfo &LI, DominatorTree &DT, DependenceInfo &DI,
+ ScalarEvolution &SE, PostDominatorTree &PDT,
+ OptimizationRemarkEmitter &ORE, const DataLayout &DL)
+ : LDT(LI), DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy), LI(LI),
+ DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE) {}
+
+ /// This is the main entry point for loop fusion. It will traverse the
+ /// specified function and collect candidate loops to fuse, starting at the
+ /// outermost nesting level and working inwards.
+ bool fuseLoops(Function &F) {
+#ifndef NDEBUG
+ if (VerboseFusionDebugging) {
+ LI.print(dbgs());
+ }
+#endif
+
+ LLVM_DEBUG(dbgs() << "Performing Loop Fusion on function " << F.getName()
+ << "\n");
+ bool Changed = false;
+
+ while (!LDT.empty()) {
+ LLVM_DEBUG(dbgs() << "Got " << LDT.size() << " loop sets for depth "
+ << LDT.getDepth() << "\n";);
+
+ for (const LoopVector &LV : LDT) {
+ assert(LV.size() > 0 && "Empty loop set was build!");
+
+ // Skip singleton loop sets as they do not offer fusion opportunities on
+ // this level.
+ if (LV.size() == 1)
+ continue;
+#ifndef NDEBUG
+ if (VerboseFusionDebugging) {
+ LLVM_DEBUG({
+ dbgs() << " Visit loop set (#" << LV.size() << "):\n";
+ printLoopVector(LV);
+ });
+ }
+#endif
+
+ collectFusionCandidates(LV);
+ Changed |= fuseCandidates();
+ }
+
+ // Finished analyzing candidates at this level.
+ // Descend to the next level and clear all of the candidates currently
+ // collected. Note that it will not be possible to fuse any of the
+ // existing candidates with new candidates because the new candidates will
+ // be at a different nest level and thus not be control flow equivalent
+ // with all of the candidates collected so far.
+ LLVM_DEBUG(dbgs() << "Descend one level!\n");
+ LDT.descend();
+ FusionCandidates.clear();
+ }
+
+ if (Changed)
+ LLVM_DEBUG(dbgs() << "Function after Loop Fusion: \n"; F.dump(););
+
+#ifndef NDEBUG
+ assert(DT.verify());
+ assert(PDT.verify());
+ LI.verify(DT);
+ SE.verify();
+#endif
+
+ LLVM_DEBUG(dbgs() << "Loop Fusion complete\n");
+ return Changed;
+ }
+
+private:
+ /// Determine if two fusion candidates are control flow equivalent.
+ ///
+ /// Two fusion candidates are control flow equivalent if when one executes,
+ /// the other is guaranteed to execute. This is determined using dominators
+ /// and post-dominators: if A dominates B and B post-dominates A then A and B
+ /// are control-flow equivalent.
+ bool isControlFlowEquivalent(const FusionCandidate &FC0,
+ const FusionCandidate &FC1) const {
+ assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders");
+
+ if (DT.dominates(FC0.Preheader, FC1.Preheader))
+ return PDT.dominates(FC1.Preheader, FC0.Preheader);
+
+ if (DT.dominates(FC1.Preheader, FC0.Preheader))
+ return PDT.dominates(FC0.Preheader, FC1.Preheader);
+
+ return false;
+ }
+
+ /// Determine if a fusion candidate (representing a loop) is eligible for
+ /// fusion. Note that this only checks whether a single loop can be fused - it
+ /// does not check whether it is *legal* to fuse two loops together.
+ bool eligibleForFusion(const FusionCandidate &FC) const {
+ if (!FC.isValid()) {
+ LLVM_DEBUG(dbgs() << "FC " << FC << " has invalid CFG requirements!\n");
+ if (!FC.Preheader)
+ InvalidPreheader++;
+ if (!FC.Header)
+ InvalidHeader++;
+ if (!FC.ExitingBlock)
+ InvalidExitingBlock++;
+ if (!FC.ExitBlock)
+ InvalidExitBlock++;
+ if (!FC.Latch)
+ InvalidLatch++;
+ if (FC.L->isInvalid())
+ InvalidLoop++;
+
+ return false;
+ }
+
+ // Require ScalarEvolution to be able to determine a trip count.
+ if (!SE.hasLoopInvariantBackedgeTakenCount(FC.L)) {
+ LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName()
+ << " trip count not computable!\n");
+ InvalidTripCount++;
+ return false;
+ }
+
+ if (!FC.L->isLoopSimplifyForm()) {
+ LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName()
+ << " is not in simplified form!\n");
+ NotSimplifiedForm++;
+ return false;
+ }
+
+ return true;
+ }
+
+ /// Iterate over all loops in the given loop set and identify the loops that
+ /// are eligible for fusion. Place all eligible fusion candidates into Control
+ /// Flow Equivalent sets, sorted by dominance.
+ void collectFusionCandidates(const LoopVector &LV) {
+ for (Loop *L : LV) {
+ FusionCandidate CurrCand(L, &DT, &PDT);
+ if (!eligibleForFusion(CurrCand))
+ continue;
+
+ // Go through each list in FusionCandidates and determine if L is control
+ // flow equivalent with the first loop in that list. If it is, append LV.
+ // If not, go to the next list.
+ // If no suitable list is found, start another list and add it to
+ // FusionCandidates.
+ bool FoundSet = false;
+
+ for (auto &CurrCandSet : FusionCandidates) {
+ if (isControlFlowEquivalent(*CurrCandSet.begin(), CurrCand)) {
+ CurrCandSet.insert(CurrCand);
+ FoundSet = true;
+#ifndef NDEBUG
+ if (VerboseFusionDebugging)
+ LLVM_DEBUG(dbgs() << "Adding " << CurrCand
+ << " to existing candidate set\n");
+#endif
+ break;
+ }
+ }
+ if (!FoundSet) {
+ // No set was found. Create a new set and add to FusionCandidates
+#ifndef NDEBUG
+ if (VerboseFusionDebugging)
+ LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new set\n");
+#endif
+ FusionCandidateSet NewCandSet;
+ NewCandSet.insert(CurrCand);
+ FusionCandidates.push_back(NewCandSet);
+ }
+ NumFusionCandidates++;
+ }
+ }
+
+ /// Determine if it is beneficial to fuse two loops.
+ ///
+ /// For now, this method simply returns true because we want to fuse as much
+ /// as possible (primarily to test the pass). This method will evolve, over
+ /// time, to add heuristics for profitability of fusion.
+ bool isBeneficialFusion(const FusionCandidate &FC0,
+ const FusionCandidate &FC1) {
+ return true;
+ }
+
+ /// Determine if two fusion candidates have the same trip count (i.e., they
+ /// execute the same number of iterations).
+ ///
+ /// Note that for now this method simply returns a boolean value because there
+ /// are no mechanisms in loop fusion to handle different trip counts. In the
+ /// future, this behaviour can be extended to adjust one of the loops to make
+ /// the trip counts equal (e.g., loop peeling). When this is added, this
+ /// interface may need to change to return more information than just a
+ /// boolean value.
+ bool identicalTripCounts(const FusionCandidate &FC0,
+ const FusionCandidate &FC1) const {
+ const SCEV *TripCount0 = SE.getBackedgeTakenCount(FC0.L);
+ if (isa<SCEVCouldNotCompute>(TripCount0)) {
+ UncomputableTripCount++;
+ LLVM_DEBUG(dbgs() << "Trip count of first loop could not be computed!");
+ return false;
+ }
+
+ const SCEV *TripCount1 = SE.getBackedgeTakenCount(FC1.L);
+ if (isa<SCEVCouldNotCompute>(TripCount1)) {
+ UncomputableTripCount++;
+ LLVM_DEBUG(dbgs() << "Trip count of second loop could not be computed!");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "\tTrip counts: " << *TripCount0 << " & "
+ << *TripCount1 << " are "
+ << (TripCount0 == TripCount1 ? "identical" : "different")
+ << "\n");
+
+ return (TripCount0 == TripCount1);
+ }
+
+ /// Walk each set of control flow equivalent fusion candidates and attempt to
+ /// fuse them. This does a single linear traversal of all candidates in the
+ /// set. The conditions for legal fusion are checked at this point. If a pair
+ /// of fusion candidates passes all legality checks, they are fused together
+ /// and a new fusion candidate is created and added to the FusionCandidateSet.
+ /// The original fusion candidates are then removed, as they are no longer
+ /// valid.
+ bool fuseCandidates() {
+ bool Fused = false;
+ LLVM_DEBUG(printFusionCandidates(FusionCandidates));
+ for (auto &CandidateSet : FusionCandidates) {
+ if (CandidateSet.size() < 2)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate Set:\n"
+ << CandidateSet << "\n");
+
+ for (auto FC0 = CandidateSet.begin(); FC0 != CandidateSet.end(); ++FC0) {
+ assert(!LDT.isRemovedLoop(FC0->L) &&
+ "Should not have removed loops in CandidateSet!");
+ auto FC1 = FC0;
+ for (++FC1; FC1 != CandidateSet.end(); ++FC1) {
+ assert(!LDT.isRemovedLoop(FC1->L) &&
+ "Should not have removed loops in CandidateSet!");
+
+ LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0->dump();
+ dbgs() << " with\n"; FC1->dump(); dbgs() << "\n");
+
+ FC0->verify();
+ FC1->verify();
+
+ if (!identicalTripCounts(*FC0, *FC1)) {
+ LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
+ "counts. Not fusing.\n");
+ NonEqualTripCount++;
+ continue;
+ }
+
+ if (!isAdjacent(*FC0, *FC1)) {
+ LLVM_DEBUG(dbgs()
+ << "Fusion candidates are not adjacent. Not fusing.\n");
+ NonAdjacent++;
+ continue;
+ }
+
+ // For now we skip fusing if the second candidate has any instructions
+ // in the preheader. This is done because we currently do not have the
+ // safety checks to determine if it is save to move the preheader of
+ // the second candidate past the body of the first candidate. Once
+ // these checks are added, this condition can be removed.
+ if (!isEmptyPreheader(*FC1)) {
+ LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
+ "preheader. Not fusing.\n");
+ NonEmptyPreheader++;
+ continue;
+ }
+
+ if (!dependencesAllowFusion(*FC0, *FC1)) {
+ LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
+ continue;
+ }
+
+ bool BeneficialToFuse = isBeneficialFusion(*FC0, *FC1);
+ LLVM_DEBUG(dbgs()
+ << "\tFusion appears to be "
+ << (BeneficialToFuse ? "" : "un") << "profitable!\n");
+ if (!BeneficialToFuse)
+ continue;
+
+ // All analysis has completed and has determined that fusion is legal
+ // and profitable. At this point, start transforming the code and
+ // perform fusion.
+
+ LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and "
+ << *FC1 << "\n");
+
+ // Report fusion to the Optimization Remarks.
+ // Note this needs to be done *before* performFusion because
+ // performFusion will change the original loops, making it not
+ // possible to identify them after fusion is complete.
+ reportLoopFusion(*FC0, *FC1, ORE);
+
+ FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT);
+ FusedCand.verify();
+ assert(eligibleForFusion(FusedCand) &&
+ "Fused candidate should be eligible for fusion!");
+
+ // Notify the loop-depth-tree that these loops are not valid objects
+ // anymore.
+ LDT.removeLoop(FC1->L);
+
+ CandidateSet.erase(FC0);
+ CandidateSet.erase(FC1);
+
+ auto InsertPos = CandidateSet.insert(FusedCand);
+
+ assert(InsertPos.second &&
+ "Unable to insert TargetCandidate in CandidateSet!");
+
+ // Reset FC0 and FC1 the new (fused) candidate. Subsequent iterations
+ // of the FC1 loop will attempt to fuse the new (fused) loop with the
+ // remaining candidates in the current candidate set.
+ FC0 = FC1 = InsertPos.first;
+
+ LLVM_DEBUG(dbgs() << "Candidate Set (after fusion): " << CandidateSet
+ << "\n");
+
+ Fused = true;
+ }
+ }
+ }
+ return Fused;
+ }
+
+ /// Rewrite all additive recurrences in a SCEV to use a new loop.
+ class AddRecLoopReplacer : public SCEVRewriteVisitor<AddRecLoopReplacer> {
+ public:
+ AddRecLoopReplacer(ScalarEvolution &SE, const Loop &OldL, const Loop &NewL,
+ bool UseMax = true)
+ : SCEVRewriteVisitor(SE), Valid(true), UseMax(UseMax), OldL(OldL),
+ NewL(NewL) {}
+
+ const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+ const Loop *ExprL = Expr->getLoop();
+ SmallVector<const SCEV *, 2> Operands;
+ if (ExprL == &OldL) {
+ Operands.append(Expr->op_begin(), Expr->op_end());
+ return SE.getAddRecExpr(Operands, &NewL, Expr->getNoWrapFlags());
+ }
+
+ if (OldL.contains(ExprL)) {
+ bool Pos = SE.isKnownPositive(Expr->getStepRecurrence(SE));
+ if (!UseMax || !Pos || !Expr->isAffine()) {
+ Valid = false;
+ return Expr;
+ }
+ return visit(Expr->getStart());
+ }
+
+ for (const SCEV *Op : Expr->operands())
+ Operands.push_back(visit(Op));
+ return SE.getAddRecExpr(Operands, ExprL, Expr->getNoWrapFlags());
+ }
+
+ bool wasValidSCEV() const { return Valid; }
+
+ private:
+ bool Valid, UseMax;
+ const Loop &OldL, &NewL;
+ };
+
+ /// Return false if the access functions of \p I0 and \p I1 could cause
+ /// a negative dependence.
+ bool accessDiffIsPositive(const Loop &L0, const Loop &L1, Instruction &I0,
+ Instruction &I1, bool EqualIsInvalid) {
+ Value *Ptr0 = getLoadStorePointerOperand(&I0);
+ Value *Ptr1 = getLoadStorePointerOperand(&I1);
+ if (!Ptr0 || !Ptr1)
+ return false;
+
+ const SCEV *SCEVPtr0 = SE.getSCEVAtScope(Ptr0, &L0);
+ const SCEV *SCEVPtr1 = SE.getSCEVAtScope(Ptr1, &L1);
+#ifndef NDEBUG
+ if (VerboseFusionDebugging)
+ LLVM_DEBUG(dbgs() << " Access function check: " << *SCEVPtr0 << " vs "
+ << *SCEVPtr1 << "\n");
+#endif
+ AddRecLoopReplacer Rewriter(SE, L0, L1);
+ SCEVPtr0 = Rewriter.visit(SCEVPtr0);
+#ifndef NDEBUG
+ if (VerboseFusionDebugging)
+ LLVM_DEBUG(dbgs() << " Access function after rewrite: " << *SCEVPtr0
+ << " [Valid: " << Rewriter.wasValidSCEV() << "]\n");
+#endif
+ if (!Rewriter.wasValidSCEV())
+ return false;
+
+ // TODO: isKnownPredicate doesnt work well when one SCEV is loop carried (by
+ // L0) and the other is not. We could check if it is monotone and test
+ // the beginning and end value instead.
+
+ BasicBlock *L0Header = L0.getHeader();
+ auto HasNonLinearDominanceRelation = [&](const SCEV *S) {
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S);
+ if (!AddRec)
+ return false;
+ return !DT.dominates(L0Header, AddRec->getLoop()->getHeader()) &&
+ !DT.dominates(AddRec->getLoop()->getHeader(), L0Header);
+ };
+ if (SCEVExprContains(SCEVPtr1, HasNonLinearDominanceRelation))
+ return false;
+
+ ICmpInst::Predicate Pred =
+ EqualIsInvalid ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_SGE;
+ bool IsAlwaysGE = SE.isKnownPredicate(Pred, SCEVPtr0, SCEVPtr1);
+#ifndef NDEBUG
+ if (VerboseFusionDebugging)
+ LLVM_DEBUG(dbgs() << " Relation: " << *SCEVPtr0
+ << (IsAlwaysGE ? " >= " : " may < ") << *SCEVPtr1
+ << "\n");
+#endif
+ return IsAlwaysGE;
+ }
+
+ /// Return true if the dependences between @p I0 (in @p L0) and @p I1 (in
+ /// @p L1) allow loop fusion of @p L0 and @p L1. The dependence analyses
+ /// specified by @p DepChoice are used to determine this.
+ bool dependencesAllowFusion(const FusionCandidate &FC0,
+ const FusionCandidate &FC1, Instruction &I0,
+ Instruction &I1, bool AnyDep,
+ FusionDependenceAnalysisChoice DepChoice) {
+#ifndef NDEBUG
+ if (VerboseFusionDebugging) {
+ LLVM_DEBUG(dbgs() << "Check dep: " << I0 << " vs " << I1 << " : "
+ << DepChoice << "\n");
+ }
+#endif
+ switch (DepChoice) {
+ case FUSION_DEPENDENCE_ANALYSIS_SCEV:
+ return accessDiffIsPositive(*FC0.L, *FC1.L, I0, I1, AnyDep);
+ case FUSION_DEPENDENCE_ANALYSIS_DA: {
+ auto DepResult = DI.depends(&I0, &I1, true);
+ if (!DepResult)
+ return true;
+#ifndef NDEBUG
+ if (VerboseFusionDebugging) {
+ LLVM_DEBUG(dbgs() << "DA res: "; DepResult->dump(dbgs());
+ dbgs() << " [#l: " << DepResult->getLevels() << "][Ordered: "
+ << (DepResult->isOrdered() ? "true" : "false")
+ << "]\n");
+ LLVM_DEBUG(dbgs() << "DepResult Levels: " << DepResult->getLevels()
+ << "\n");
+ }
+#endif
+
+ if (DepResult->getNextPredecessor() || DepResult->getNextSuccessor())
+ LLVM_DEBUG(
+ dbgs() << "TODO: Implement pred/succ dependence handling!\n");
+
+ // TODO: Can we actually use the dependence info analysis here?
+ return false;
+ }
+
+ case FUSION_DEPENDENCE_ANALYSIS_ALL:
+ return dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep,
+ FUSION_DEPENDENCE_ANALYSIS_SCEV) ||
+ dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep,
+ FUSION_DEPENDENCE_ANALYSIS_DA);
+ }
+
+ llvm_unreachable("Unknown fusion dependence analysis choice!");
+ }
+
+ /// Perform a dependence check and return if @p FC0 and @p FC1 can be fused.
+ bool dependencesAllowFusion(const FusionCandidate &FC0,
+ const FusionCandidate &FC1) {
+ LLVM_DEBUG(dbgs() << "Check if " << FC0 << " can be fused with " << FC1
+ << "\n");
+ assert(FC0.L->getLoopDepth() == FC1.L->getLoopDepth());
+ assert(DT.dominates(FC0.Preheader, FC1.Preheader));
+
+ for (Instruction *WriteL0 : FC0.MemWrites) {
+ for (Instruction *WriteL1 : FC1.MemWrites)
+ if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1,
+ /* AnyDep */ false,
+ FusionDependenceAnalysis)) {
+ InvalidDependencies++;
+ return false;
+ }
+ for (Instruction *ReadL1 : FC1.MemReads)
+ if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *ReadL1,
+ /* AnyDep */ false,
+ FusionDependenceAnalysis)) {
+ InvalidDependencies++;
+ return false;
+ }
+ }
+
+ for (Instruction *WriteL1 : FC1.MemWrites) {
+ for (Instruction *WriteL0 : FC0.MemWrites)
+ if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1,
+ /* AnyDep */ false,
+ FusionDependenceAnalysis)) {
+ InvalidDependencies++;
+ return false;
+ }
+ for (Instruction *ReadL0 : FC0.MemReads)
+ if (!dependencesAllowFusion(FC0, FC1, *ReadL0, *WriteL1,
+ /* AnyDep */ false,
+ FusionDependenceAnalysis)) {
+ InvalidDependencies++;
+ return false;
+ }
+ }
+
+ // Walk through all uses in FC1. For each use, find the reaching def. If the
+ // def is located in FC0 then it is is not safe to fuse.
+ for (BasicBlock *BB : FC1.L->blocks())
+ for (Instruction &I : *BB)
+ for (auto &Op : I.operands())
+ if (Instruction *Def = dyn_cast<Instruction>(Op))
+ if (FC0.L->contains(Def->getParent())) {
+ InvalidDependencies++;
+ return false;
+ }
+
+ return true;
+ }
+
+ /// Determine if the exit block of \p FC0 is the preheader of \p FC1. In this
+ /// case, there is no code in between the two fusion candidates, thus making
+ /// them adjacent.
+ bool isAdjacent(const FusionCandidate &FC0,
+ const FusionCandidate &FC1) const {
+ return FC0.ExitBlock == FC1.Preheader;
+ }
+
+ bool isEmptyPreheader(const FusionCandidate &FC) const {
+ return FC.Preheader->size() == 1;
+ }
+
+ /// Fuse two fusion candidates, creating a new fused loop.
+ ///
+ /// This method contains the mechanics of fusing two loops, represented by \p
+ /// FC0 and \p FC1. It is assumed that \p FC0 dominates \p FC1 and \p FC1
+ /// postdominates \p FC0 (making them control flow equivalent). It also
+ /// assumes that the other conditions for fusion have been met: adjacent,
+ /// identical trip counts, and no negative distance dependencies exist that
+ /// would prevent fusion. Thus, there is no checking for these conditions in
+ /// this method.
+ ///
+ /// Fusion is performed by rewiring the CFG to update successor blocks of the
+ /// components of tho loop. Specifically, the following changes are done:
+ ///
+ /// 1. The preheader of \p FC1 is removed as it is no longer necessary
+ /// (because it is currently only a single statement block).
+ /// 2. The latch of \p FC0 is modified to jump to the header of \p FC1.
+ /// 3. The latch of \p FC1 i modified to jump to the header of \p FC0.
+ /// 4. All blocks from \p FC1 are removed from FC1 and added to FC0.
+ ///
+ /// All of these modifications are done with dominator tree updates, thus
+ /// keeping the dominator (and post dominator) information up-to-date.
+ ///
+ /// This can be improved in the future by actually merging blocks during
+ /// fusion. For example, the preheader of \p FC1 can be merged with the
+ /// preheader of \p FC0. This would allow loops with more than a single
+ /// statement in the preheader to be fused. Similarly, the latch blocks of the
+ /// two loops could also be fused into a single block. This will require
+ /// analysis to prove it is safe to move the contents of the block past
+ /// existing code, which currently has not been implemented.
+ Loop *performFusion(const FusionCandidate &FC0, const FusionCandidate &FC1) {
+ assert(FC0.isValid() && FC1.isValid() &&
+ "Expecting valid fusion candidates");
+
+ LLVM_DEBUG(dbgs() << "Fusion Candidate 0: \n"; FC0.dump();
+ dbgs() << "Fusion Candidate 1: \n"; FC1.dump(););
+
+ assert(FC1.Preheader == FC0.ExitBlock);
+ assert(FC1.Preheader->size() == 1 &&
+ FC1.Preheader->getSingleSuccessor() == FC1.Header);
+
+ // Remember the phi nodes originally in the header of FC0 in order to rewire
+ // them later. However, this is only necessary if the new loop carried
+ // values might not dominate the exiting branch. While we do not generally
+ // test if this is the case but simply insert intermediate phi nodes, we
+ // need to make sure these intermediate phi nodes have different
+ // predecessors. To this end, we filter the special case where the exiting
+ // block is the latch block of the first loop. Nothing needs to be done
+ // anyway as all loop carried values dominate the latch and thereby also the
+ // exiting branch.
+ SmallVector<PHINode *, 8> OriginalFC0PHIs;
+ if (FC0.ExitingBlock != FC0.Latch)
+ for (PHINode &PHI : FC0.Header->phis())
+ OriginalFC0PHIs.push_back(&PHI);
+
+ // Replace incoming blocks for header PHIs first.
+ FC1.Preheader->replaceSuccessorsPhiUsesWith(FC0.Preheader);
+ FC0.Latch->replaceSuccessorsPhiUsesWith(FC1.Latch);
+
+ // Then modify the control flow and update DT and PDT.
+ SmallVector<DominatorTree::UpdateType, 8> TreeUpdates;
+
+ // The old exiting block of the first loop (FC0) has to jump to the header
+ // of the second as we need to execute the code in the second header block
+ // regardless of the trip count. That is, if the trip count is 0, so the
+ // back edge is never taken, we still have to execute both loop headers,
+ // especially (but not only!) if the second is a do-while style loop.
+ // However, doing so might invalidate the phi nodes of the first loop as
+ // the new values do only need to dominate their latch and not the exiting
+ // predicate. To remedy this potential problem we always introduce phi
+ // nodes in the header of the second loop later that select the loop carried
+ // value, if the second header was reached through an old latch of the
+ // first, or undef otherwise. This is sound as exiting the first implies the
+ // second will exit too, __without__ taking the back-edge. [Their
+ // trip-counts are equal after all.
+ // KB: Would this sequence be simpler to just just make FC0.ExitingBlock go
+ // to FC1.Header? I think this is basically what the three sequences are
+ // trying to accomplish; however, doing this directly in the CFG may mean
+ // the DT/PDT becomes invalid
+ FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader,
+ FC1.Header);
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
+
+ // The pre-header of L1 is not necessary anymore.
+ assert(pred_begin(FC1.Preheader) == pred_end(FC1.Preheader));
+ FC1.Preheader->getTerminator()->eraseFromParent();
+ new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader);
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC1.Preheader, FC1.Header));
+
+ // Moves the phi nodes from the second to the first loops header block.
+ while (PHINode *PHI = dyn_cast<PHINode>(&FC1.Header->front())) {
+ if (SE.isSCEVable(PHI->getType()))
+ SE.forgetValue(PHI);
+ if (PHI->hasNUsesOrMore(1))
+ PHI->moveBefore(&*FC0.Header->getFirstInsertionPt());
+ else
+ PHI->eraseFromParent();
+ }
+
+ // Introduce new phi nodes in the second loop header to ensure
+ // exiting the first and jumping to the header of the second does not break
+ // the SSA property of the phis originally in the first loop. See also the
+ // comment above.
+ Instruction *L1HeaderIP = &FC1.Header->front();
+ for (PHINode *LCPHI : OriginalFC0PHIs) {
+ int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch);
+ assert(L1LatchBBIdx >= 0 &&
+ "Expected loop carried value to be rewired at this point!");
+
+ Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx);
+
+ PHINode *L1HeaderPHI = PHINode::Create(
+ LCV->getType(), 2, LCPHI->getName() + ".afterFC0", L1HeaderIP);
+ L1HeaderPHI->addIncoming(LCV, FC0.Latch);
+ L1HeaderPHI->addIncoming(UndefValue::get(LCV->getType()),
+ FC0.ExitingBlock);
+
+ LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI);
+ }
+
+ // Replace latch terminator destinations.
+ FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
+ FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
+
+ // If FC0.Latch and FC0.ExitingBlock are the same then we have already
+ // performed the updates above.
+ if (FC0.Latch != FC0.ExitingBlock)
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Insert, FC0.Latch, FC1.Header));
+
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
+ FC0.Latch, FC0.Header));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Insert,
+ FC1.Latch, FC0.Header));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
+ FC1.Latch, FC1.Header));
+
+ // Update DT/PDT
+ DTU.applyUpdates(TreeUpdates);
+
+ LI.removeBlock(FC1.Preheader);
+ DTU.deleteBB(FC1.Preheader);
+ DTU.flush();
+
+ // Is there a way to keep SE up-to-date so we don't need to forget the loops
+ // and rebuild the information in subsequent passes of fusion?
+ SE.forgetLoop(FC1.L);
+ SE.forgetLoop(FC0.L);
+
+ // Merge the loops.
+ SmallVector<BasicBlock *, 8> Blocks(FC1.L->block_begin(),
+ FC1.L->block_end());
+ for (BasicBlock *BB : Blocks) {
+ FC0.L->addBlockEntry(BB);
+ FC1.L->removeBlockFromLoop(BB);
+ if (LI.getLoopFor(BB) != FC1.L)
+ continue;
+ LI.changeLoopFor(BB, FC0.L);
+ }
+ while (!FC1.L->empty()) {
+ const auto &ChildLoopIt = FC1.L->begin();
+ Loop *ChildLoop = *ChildLoopIt;
+ FC1.L->removeChildLoop(ChildLoopIt);
+ FC0.L->addChildLoop(ChildLoop);
+ }
+
+ // Delete the now empty loop L1.
+ LI.erase(FC1.L);
+
+#ifndef NDEBUG
+ assert(!verifyFunction(*FC0.Header->getParent(), &errs()));
+ assert(DT.verify(DominatorTree::VerificationLevel::Fast));
+ assert(PDT.verify());
+ LI.verify(DT);
+ SE.verify();
+#endif
+
+ FuseCounter++;
+
+ LLVM_DEBUG(dbgs() << "Fusion done:\n");
+
+ return FC0.L;
+ }
+};
+
+struct LoopFuseLegacy : public FunctionPass {
+
+ static char ID;
+
+ LoopFuseLegacy() : FunctionPass(ID) {
+ initializeLoopFuseLegacyPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTreeWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ AU.addRequired<DependenceAnalysisWrapperPass>();
+
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<PostDominatorTreeWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
+ auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+ auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL);
+ return LF.fuseLoops(F);
+ }
+};
+
+PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) {
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &DI = AM.getResult<DependenceAnalysis>(F);
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
+ auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL);
+ bool Changed = LF.fuseLoops(F);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<PostDominatorTreeAnalysis>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ PA.preserve<LoopAnalysis>();
+ return PA;
+}
+
+char LoopFuseLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_END(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false, false)
+
+FunctionPass *llvm::createLoopFusePass() { return new LoopFuseLegacy(); }
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index fbffa1920a84..e561494f19cf 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1,9 +1,8 @@
//===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,6 +36,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -51,12 +51,12 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -87,8 +87,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>
#include <cassert>
@@ -120,6 +120,7 @@ class LoopIdiomRecognize {
TargetLibraryInfo *TLI;
const TargetTransformInfo *TTI;
const DataLayout *DL;
+ OptimizationRemarkEmitter &ORE;
bool ApplyCodeSizeHeuristics;
public:
@@ -127,8 +128,9 @@ public:
LoopInfo *LI, ScalarEvolution *SE,
TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
- const DataLayout *DL)
- : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL) {}
+ const DataLayout *DL,
+ OptimizationRemarkEmitter &ORE)
+ : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {}
bool runOnLoop(Loop *L);
@@ -221,7 +223,12 @@ public:
*L->getHeader()->getParent());
const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
- LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL);
+ // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
+ // pass. Function analyses need to be preserved across loop transformations
+ // but ORE cannot be preserved (see comment before the pass definition).
+ OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
+
+ LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, ORE);
return LIR.runOnLoop(L);
}
@@ -243,7 +250,19 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
LPMUpdater &) {
const auto *DL = &L.getHeader()->getModule()->getDataLayout();
- LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL);
+ const auto &FAM =
+ AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
+ Function *F = L.getHeader()->getParent();
+
+ auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
+ // FIXME: This should probably be optional rather than required.
+ if (!ORE)
+ report_fatal_error(
+ "LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached "
+ "at a higher level");
+
+ LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL,
+ *ORE);
if (!LIR.runOnLoop(&L))
return PreservedAnalyses::all();
@@ -285,7 +304,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
// Determine if code size heuristics need to be applied.
ApplyCodeSizeHeuristics =
- L->getHeader()->getParent()->optForSize() && UseLIRCodeSizeHeurs;
+ L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
HasMemset = TLI->has(LibFunc_memset);
HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
@@ -313,9 +332,10 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
SmallVector<BasicBlock *, 8> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
- LLVM_DEBUG(dbgs() << "loop-idiom Scanning: F["
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
<< CurLoop->getHeader()->getParent()->getName()
- << "] Loop %" << CurLoop->getHeader()->getName() << "\n");
+ << "] Countable Loop %" << CurLoop->getHeader()->getName()
+ << "\n");
bool MadeChange = false;
@@ -430,7 +450,7 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
// turned into a memset of i8 -1, assuming that all the consecutive bytes
// are stored. A store of i32 0x01020304 can never be turned into a memset,
// but it can be turned into memset_pattern if the target supports it.
- Value *SplatValue = isBytewiseValue(StoredVal);
+ Value *SplatValue = isBytewiseValue(StoredVal, *DL);
Constant *PatternValue = nullptr;
// Note: memset and memset_pattern on unordered-atomic is yet not supported
@@ -607,7 +627,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
Constant *FirstPatternValue = nullptr;
if (For == ForMemset::Yes)
- FirstSplatValue = isBytewiseValue(FirstStoredVal);
+ FirstSplatValue = isBytewiseValue(FirstStoredVal, *DL);
else
FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
@@ -640,7 +660,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
Constant *SecondPatternValue = nullptr;
if (For == ForMemset::Yes)
- SecondSplatValue = isBytewiseValue(SecondStoredVal);
+ SecondSplatValue = isBytewiseValue(SecondStoredVal, *DL);
else
SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
@@ -860,7 +880,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Value *StoredVal, Instruction *TheStore,
SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
- Value *SplatValue = isBytewiseValue(StoredVal);
+ Value *SplatValue = isBytewiseValue(StoredVal, *DL);
Constant *PatternValue = nullptr;
if (!SplatValue)
@@ -931,9 +951,8 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Module *M = TheStore->getModule();
StringRef FuncName = "memset_pattern16";
- Value *MSP =
- M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
- Int8PtrTy, Int8PtrTy, IntPtr);
+ FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
+ Int8PtrTy, Int8PtrTy, IntPtr);
inferLibFuncAttributes(M, FuncName, *TLI);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
@@ -952,6 +971,14 @@ bool LoopIdiomRecognize::processLoopStridedStore(
<< "\n");
NewCall->setDebugLoc(TheStore->getDebugLoc());
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore",
+ NewCall->getDebugLoc(), Preheader)
+ << "Transformed loop-strided store into a call to "
+ << ore::NV("NewFunction", NewCall->getCalledFunction())
+ << "() function";
+ });
+
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
for (auto *I : Stores)
@@ -1084,6 +1111,14 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
<< " from store ptr=" << *StoreEv << " at: " << *SI
<< "\n");
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStoreOfLoopLoad",
+ NewCall->getDebugLoc(), Preheader)
+ << "Formed a call to "
+ << ore::NV("NewFunction", NewCall->getCalledFunction())
+ << "() function";
+ });
+
// Okay, the memcpy has been formed. Zap the original store and anything that
// feeds into it.
deleteDeadInstruction(SI);
@@ -1109,6 +1144,11 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
}
bool LoopIdiomRecognize::runOnNoncountableLoop() {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
+ << CurLoop->getHeader()->getParent()->getName()
+ << "] Noncountable Loop %"
+ << CurLoop->getHeader()->getName() << "\n");
+
return recognizePopcount() || recognizeAndInsertFFS();
}
@@ -1462,9 +1502,15 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
const Value *Args[] =
{InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
: ConstantInt::getFalse(InitX->getContext())};
- if (CurLoop->getHeader()->size() != IdiomCanonicalSize &&
+
+ // @llvm.dbg doesn't count as they have no semantic effect.
+ auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
+ uint32_t HeaderSize =
+ std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
+
+ if (HeaderSize != IdiomCanonicalSize &&
TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) >
- TargetTransformInfo::TCC_Basic)
+ TargetTransformInfo::TCC_Basic)
return false;
transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
@@ -1529,7 +1575,7 @@ static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
Type *Tys[] = {Val->getType()};
Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
- Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
+ Function *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
CallInst *CI = IRBuilder.CreateCall(Func, Ops);
CI->setDebugLoc(DL);
@@ -1543,7 +1589,7 @@ static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
Type *Tys[] = {Val->getType()};
Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
- Value *Func = Intrinsic::getDeclaration(M, IID, Tys);
+ Function *Func = Intrinsic::getDeclaration(M, IID, Tys);
CallInst *CI = IRBuilder.CreateCall(Func, Ops);
CI->setDebugLoc(DL);
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 6f7dc2429c09..31191b52895c 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -1,9 +1,8 @@
//===- LoopInstSimplify.cpp - Loop Instruction Simplification Pass --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -234,6 +233,8 @@ PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
auto PA = getLoopPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();
+ if (EnableMSSALoopDependency)
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp
index 766e39b439a0..9a42365adc1b 100644
--- a/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1,9 +1,8 @@
//===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1265,9 +1264,7 @@ bool LoopInterchangeTransform::transform() {
}
void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) {
- BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
- BasicBlock *InnerLoopLatchPred = InnerLoopLatch;
- InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI);
+ SplitBlock(InnerLoop->getLoopLatch(), Inc, DT, LI);
}
/// \brief Move all instructions except the terminator from FromBB right before
@@ -1280,17 +1277,6 @@ static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
FromBB->getTerminator()->getIterator());
}
-static void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred,
- BasicBlock *NewPred) {
- for (PHINode &PHI : CurrBlock->phis()) {
- unsigned Num = PHI.getNumIncomingValues();
- for (unsigned i = 0; i < Num; ++i) {
- if (PHI.getIncomingBlock(i) == OldPred)
- PHI.setIncomingBlock(i, NewPred);
- }
- }
-}
-
/// Update BI to jump to NewBB instead of OldBB. Records updates to
/// the dominator tree in DTUpdates, if DT should be preserved.
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
@@ -1313,8 +1299,41 @@ static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
}
// Move Lcssa PHIs to the right place.
-static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerLatch,
- BasicBlock *OuterLatch) {
+static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
+ BasicBlock *InnerLatch, BasicBlock *OuterHeader,
+ BasicBlock *OuterLatch, BasicBlock *OuterExit) {
+
+ // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
+ // defined either in the header or latch. Those blocks will become header and
+ // latch of the new outer loop, and the only possible users can PHI nodes
+ // in the exit block of the loop nest or the outer loop header (reduction
+ // PHIs, in that case, the incoming value must be defined in the inner loop
+ // header). We can just substitute the user with the incoming value and remove
+ // the PHI.
+ for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
+ assert(P.getNumIncomingValues() == 1 &&
+ "Only loops with a single exit are supported!");
+
+ // Incoming values are guaranteed be instructions currently.
+ auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
+ // Skip phis with incoming values from the inner loop body, excluding the
+ // header and latch.
+ if (IncI->getParent() != InnerLatch && IncI->getParent() != InnerHeader)
+ continue;
+
+ assert(all_of(P.users(),
+ [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
+ return (cast<PHINode>(U)->getParent() == OuterHeader &&
+ IncI->getParent() == InnerHeader) ||
+ cast<PHINode>(U)->getParent() == OuterExit;
+ }) &&
+ "Can only replace phis iff the uses are in the loop nest exit or "
+ "the incoming value is defined in the inner header (it will "
+ "dominate all loop blocks after interchanging)");
+ P.replaceAllUsesWith(IncI);
+ P.eraseFromParent();
+ }
+
SmallVector<PHINode *, 8> LcssaInnerExit;
for (PHINode &P : InnerExit->phis())
LcssaInnerExit.push_back(&P);
@@ -1327,35 +1346,43 @@ static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerLatch,
// If a PHI node has users outside of InnerExit, it has a use outside the
// interchanged loop and we have to preserve it. We move these to
// InnerLatch, which will become the new exit block for the innermost
- // loop after interchanging. For PHIs only used in InnerExit, we can just
- // replace them with the incoming value.
- for (PHINode *P : LcssaInnerExit) {
- bool hasUsersOutside = false;
- for (auto UI = P->use_begin(), E = P->use_end(); UI != E;) {
- Use &U = *UI;
- ++UI;
- auto *Usr = cast<Instruction>(U.getUser());
- if (Usr->getParent() != InnerExit) {
- hasUsersOutside = true;
- continue;
- }
- U.set(P->getIncomingValueForBlock(InnerLatch));
- }
- if (hasUsersOutside)
- P->moveBefore(InnerLatch->getFirstNonPHI());
- else
- P->eraseFromParent();
- }
+ // loop after interchanging.
+ for (PHINode *P : LcssaInnerExit)
+ P->moveBefore(InnerLatch->getFirstNonPHI());
// If the inner loop latch contains LCSSA PHIs, those come from a child loop
// and we have to move them to the new inner latch.
for (PHINode *P : LcssaInnerLatch)
P->moveBefore(InnerExit->getFirstNonPHI());
+ // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
+ // incoming values from the outer latch or header, we have to add a new PHI
+ // in the inner loop latch, which became the exit block of the outer loop,
+ // after interchanging.
+ if (OuterExit) {
+ for (PHINode &P : OuterExit->phis()) {
+ if (P.getNumIncomingValues() != 1)
+ continue;
+ // Skip Phis with incoming values not defined in the outer loop's header
+ // and latch. Also skip incoming phis defined in the latch. Those should
+ // already have been updated.
+ auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
+ if (!I || ((I->getParent() != OuterLatch || isa<PHINode>(I)) &&
+ I->getParent() != OuterHeader))
+ continue;
+
+ PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
+ NewPhi->setIncomingValue(0, P.getIncomingValue(0));
+ NewPhi->setIncomingBlock(0, OuterLatch);
+ NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
+ P.setIncomingValue(0, NewPhi);
+ }
+ }
+
// Now adjust the incoming blocks for the LCSSA PHIs.
// For PHIs moved from Inner's exit block, we need to replace Inner's latch
// with the new latch.
- updateIncomingBlock(InnerLatch, InnerLatch, OuterLatch);
+ InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
}
bool LoopInterchangeTransform::adjustLoopBranches() {
@@ -1374,9 +1401,11 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
// preheaders do not satisfy those conditions.
if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
!OuterLoopPreHeader->getUniquePredecessor())
- OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, DT, LI, true);
+ OuterLoopPreHeader =
+ InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
if (InnerLoopPreHeader == OuterLoop->getHeader())
- InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, DT, LI, true);
+ InnerLoopPreHeader =
+ InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
// Adjust the loop preheader
BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
@@ -1422,8 +1451,8 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
InnerLoopHeaderSuccessor, DTUpdates);
// Adjust reduction PHI's now that the incoming block has changed.
- updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader,
- OuterLoopHeader);
+ InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
+ OuterLoopHeader);
updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
OuterLoopPreHeader, DTUpdates);
@@ -1452,10 +1481,11 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
OuterLoopPreHeader);
- moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopLatch, OuterLoopLatch);
+ moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
+ OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock());
// For PHIs in the exit block of the outer loop, outer's latch has been
// replaced by Inners'.
- updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);
+ OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
// Now update the reduction PHIs in the inner and outer loop headers.
SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
@@ -1482,10 +1512,10 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
}
// Update the incoming blocks for moved PHI nodes.
- updateIncomingBlock(OuterLoopHeader, InnerLoopPreHeader, OuterLoopPreHeader);
- updateIncomingBlock(OuterLoopHeader, InnerLoopLatch, OuterLoopLatch);
- updateIncomingBlock(InnerLoopHeader, OuterLoopPreHeader, InnerLoopPreHeader);
- updateIncomingBlock(InnerLoopHeader, OuterLoopLatch, InnerLoopLatch);
+ OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
+ OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
+ InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
+ InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
return true;
}
diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp
index 19bd9ebcc15b..2b3d5e0ce9b7 100644
--- a/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -1,9 +1,8 @@
//===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,10 +29,14 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -54,6 +57,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
#include <forward_list>
@@ -159,8 +163,9 @@ namespace {
class LoadEliminationForLoop {
public:
LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
- DominatorTree *DT)
- : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.getPSE()) {}
+ DominatorTree *DT, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo* PSI)
+ : L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {}
/// Look through the loop-carried and loop-independent dependences in
/// this loop and find store->load dependences.
@@ -428,9 +433,9 @@ public:
auto *PH = L->getLoopPreheader();
Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
PH->getTerminator());
- Value *Initial =
- new LoadInst(InitialPtr, "load_initial", /* isVolatile */ false,
- Cand.Load->getAlignment(), PH->getTerminator());
+ Value *Initial = new LoadInst(
+ Cand.Load->getType(), InitialPtr, "load_initial",
+ /* isVolatile */ false, Cand.Load->getAlignment(), PH->getTerminator());
PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
&L->getHeader()->front());
@@ -529,7 +534,17 @@ public:
}
if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
- if (L->getHeader()->getParent()->optForSize()) {
+ if (LAI.hasConvergentOp()) {
+ LLVM_DEBUG(dbgs() << "Versioning is needed but not allowed with "
+ "convergent calls\n");
+ return false;
+ }
+
+ auto *HeaderBB = L->getHeader();
+ auto *F = HeaderBB->getParent();
+ bool OptForSize = F->hasOptSize() ||
+ llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI);
+ if (OptForSize) {
LLVM_DEBUG(
dbgs() << "Versioning is needed but not allowed when optimizing "
"for size.\n");
@@ -572,6 +587,8 @@ private:
LoopInfo *LI;
const LoopAccessInfo &LAI;
DominatorTree *DT;
+ BlockFrequencyInfo *BFI;
+ ProfileSummaryInfo *PSI;
PredicatedScalarEvolution PSE;
};
@@ -579,6 +596,7 @@ private:
static bool
eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
function_ref<const LoopAccessInfo &(Loop &)> GetLAI) {
// Build up a worklist of inner-loops to transform to avoid iterator
// invalidation.
@@ -597,7 +615,7 @@ eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
bool Changed = false;
for (Loop *L : Worklist) {
// The actual work is performed by LoadEliminationForLoop.
- LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT);
+ LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT, BFI, PSI);
Changed |= LEL.processLoop();
}
return Changed;
@@ -622,10 +640,14 @@ public:
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &LAA = getAnalysis<LoopAccessLegacyAnalysis>();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
// Process each loop nest in the function.
return eliminateLoadsAcrossLoops(
- F, LI, DT,
+ F, LI, DT, BFI, PSI,
[&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); });
}
@@ -638,6 +660,8 @@ public:
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
}
};
@@ -653,6 +677,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
FunctionPass *llvm::createLoopLoadEliminationPass() {
@@ -668,12 +694,18 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ auto &MAM = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+ auto *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+ &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
+ MemorySSA *MSSA = EnableMSSALoopDependency
+ ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
+ : nullptr;
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
bool Changed = eliminateLoadsAcrossLoops(
- F, LI, DT, [&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI,
- SE, TLI, TTI, nullptr};
+ F, LI, DT, BFI, PSI, [&](Loop &L) -> const LoopAccessInfo & {
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
});
diff --git a/lib/Transforms/Scalar/LoopPassManager.cpp b/lib/Transforms/Scalar/LoopPassManager.cpp
index 774ad7b945a0..f3bfbd3564ab 100644
--- a/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -1,9 +1,8 @@
//===- LoopPassManager.cpp - Loop pass management -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp
index 5983c804c0c1..507a1e251ca6 100644
--- a/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/lib/Transforms/Scalar/LoopPredication.cpp
@@ -1,9 +1,8 @@
//===-- LoopPredication.cpp - Guard based loop predication pass -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -179,6 +178,7 @@
#include "llvm/Transforms/Scalar/LoopPredication.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -194,6 +194,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#define DEBUG_TYPE "loop-predication"
@@ -222,24 +223,31 @@ static cl::opt<float> LatchExitProbabilityScale(
cl::desc("scale factor for the latch probability. Value should be greater "
"than 1. Lower values are ignored"));
+static cl::opt<bool> PredicateWidenableBranchGuards(
+ "loop-predication-predicate-widenable-branches-to-deopt", cl::Hidden,
+ cl::desc("Whether or not we should predicate guards "
+ "expressed as widenable branches to deoptimize blocks"),
+ cl::init(true));
+
namespace {
-class LoopPredication {
- /// Represents an induction variable check:
- /// icmp Pred, <induction variable>, <loop invariant limit>
- struct LoopICmp {
- ICmpInst::Predicate Pred;
- const SCEVAddRecExpr *IV;
- const SCEV *Limit;
- LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,
- const SCEV *Limit)
- : Pred(Pred), IV(IV), Limit(Limit) {}
- LoopICmp() {}
- void dump() {
- dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV
- << ", Limit = " << *Limit << "\n";
- }
- };
+/// Represents an induction variable check:
+/// icmp Pred, <induction variable>, <loop invariant limit>
+struct LoopICmp {
+ ICmpInst::Predicate Pred;
+ const SCEVAddRecExpr *IV;
+ const SCEV *Limit;
+ LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,
+ const SCEV *Limit)
+ : Pred(Pred), IV(IV), Limit(Limit) {}
+ LoopICmp() {}
+ void dump() {
+ dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV
+ << ", Limit = " << *Limit << "\n";
+ }
+};
+class LoopPredication {
+ AliasAnalysis *AA;
ScalarEvolution *SE;
BranchProbabilityInfo *BPI;
@@ -249,58 +257,53 @@ class LoopPredication {
LoopICmp LatchCheck;
bool isSupportedStep(const SCEV* Step);
- Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI) {
- return parseLoopICmp(ICI->getPredicate(), ICI->getOperand(0),
- ICI->getOperand(1));
- }
- Optional<LoopICmp> parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS,
- Value *RHS);
-
+ Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI);
Optional<LoopICmp> parseLoopLatchICmp();
- bool CanExpand(const SCEV* S);
- Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder,
- ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
- Instruction *InsertAt);
+ /// Return an insertion point suitable for inserting a safe to speculate
+ /// instruction whose only user will be 'User' which has operands 'Ops'. A
+ /// trivial result would be the at the User itself, but we try to return a
+ /// loop invariant location if possible.
+ Instruction *findInsertPt(Instruction *User, ArrayRef<Value*> Ops);
+ /// Same as above, *except* that this uses the SCEV definition of invariant
+ /// which is that an expression *can be made* invariant via SCEVExpander.
+ /// Thus, this version is only suitable for finding an insert point to be be
+ /// passed to SCEVExpander!
+ Instruction *findInsertPt(Instruction *User, ArrayRef<const SCEV*> Ops);
+
+ /// Return true if the value is known to produce a single fixed value across
+ /// all iterations on which it executes. Note that this does not imply
+ /// speculation safety. That must be established seperately.
+ bool isLoopInvariantValue(const SCEV* S);
+
+ Value *expandCheck(SCEVExpander &Expander, Instruction *Guard,
+ ICmpInst::Predicate Pred, const SCEV *LHS,
+ const SCEV *RHS);
Optional<Value *> widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander,
- IRBuilder<> &Builder);
+ Instruction *Guard);
Optional<Value *> widenICmpRangeCheckIncrementingLoop(LoopICmp LatchCheck,
LoopICmp RangeCheck,
SCEVExpander &Expander,
- IRBuilder<> &Builder);
+ Instruction *Guard);
Optional<Value *> widenICmpRangeCheckDecrementingLoop(LoopICmp LatchCheck,
LoopICmp RangeCheck,
SCEVExpander &Expander,
- IRBuilder<> &Builder);
+ Instruction *Guard);
+ unsigned collectChecks(SmallVectorImpl<Value *> &Checks, Value *Condition,
+ SCEVExpander &Expander, Instruction *Guard);
bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
-
+ bool widenWidenableBranchGuardConditions(BranchInst *Guard, SCEVExpander &Expander);
// If the loop always exits through another block in the loop, we should not
// predicate based on the latch check. For example, the latch check can be a
// very coarse grained check and there can be more fine grained exit checks
// within the loop. We identify such unprofitable loops through BPI.
bool isLoopProfitableToPredicate();
- // When the IV type is wider than the range operand type, we can still do loop
- // predication, by generating SCEVs for the range and latch that are of the
- // same type. We achieve this by generating a SCEV truncate expression for the
- // latch IV. This is done iff truncation of the IV is a safe operation,
- // without loss of information.
- // Another way to achieve this is by generating a wider type SCEV for the
- // range check operand, however, this needs a more involved check that
- // operands do not overflow. This can lead to loss of information when the
- // range operand is of the form: add i32 %offset, %iv. We need to prove that
- // sext(x + y) is same as sext(x) + sext(y).
- // This function returns true if we can safely represent the IV type in
- // the RangeCheckType without loss of information.
- bool isSafeToTruncateWideIVType(Type *RangeCheckType);
- // Return the loopLatchCheck corresponding to the RangeCheckType if safe to do
- // so.
- Optional<LoopICmp> generateLoopLatchCheck(Type *RangeCheckType);
-
public:
- LoopPredication(ScalarEvolution *SE, BranchProbabilityInfo *BPI)
- : SE(SE), BPI(BPI){};
+ LoopPredication(AliasAnalysis *AA, ScalarEvolution *SE,
+ BranchProbabilityInfo *BPI)
+ : AA(AA), SE(SE), BPI(BPI){};
bool runOnLoop(Loop *L);
};
@@ -322,7 +325,8 @@ public:
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
BranchProbabilityInfo &BPI =
getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
- LoopPredication LP(SE, &BPI);
+ auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ LoopPredication LP(AA, SE, &BPI);
return LP.runOnLoop(L);
}
};
@@ -348,16 +352,19 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
Function *F = L.getHeader()->getParent();
auto *BPI = FAM.getCachedResult<BranchProbabilityAnalysis>(*F);
- LoopPredication LP(&AR.SE, BPI);
+ LoopPredication LP(&AR.AA, &AR.SE, BPI);
if (!LP.runOnLoop(&L))
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
}
-Optional<LoopPredication::LoopICmp>
-LoopPredication::parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS,
- Value *RHS) {
+Optional<LoopICmp>
+LoopPredication::parseLoopICmp(ICmpInst *ICI) {
+ auto Pred = ICI->getPredicate();
+ auto *LHS = ICI->getOperand(0);
+ auto *RHS = ICI->getOperand(1);
+
const SCEV *LHSS = SE->getSCEV(LHS);
if (isa<SCEVCouldNotCompute>(LHSS))
return None;
@@ -380,42 +387,98 @@ LoopPredication::parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS,
}
Value *LoopPredication::expandCheck(SCEVExpander &Expander,
- IRBuilder<> &Builder,
+ Instruction *Guard,
ICmpInst::Predicate Pred, const SCEV *LHS,
- const SCEV *RHS, Instruction *InsertAt) {
- // TODO: we can check isLoopEntryGuardedByCond before emitting the check
-
+ const SCEV *RHS) {
Type *Ty = LHS->getType();
assert(Ty == RHS->getType() && "expandCheck operands have different types?");
- if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS))
- return Builder.getTrue();
+ if (SE->isLoopInvariant(LHS, L) && SE->isLoopInvariant(RHS, L)) {
+ IRBuilder<> Builder(Guard);
+ if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS))
+ return Builder.getTrue();
+ if (SE->isLoopEntryGuardedByCond(L, ICmpInst::getInversePredicate(Pred),
+ LHS, RHS))
+ return Builder.getFalse();
+ }
- Value *LHSV = Expander.expandCodeFor(LHS, Ty, InsertAt);
- Value *RHSV = Expander.expandCodeFor(RHS, Ty, InsertAt);
+ Value *LHSV = Expander.expandCodeFor(LHS, Ty, findInsertPt(Guard, {LHS}));
+ Value *RHSV = Expander.expandCodeFor(RHS, Ty, findInsertPt(Guard, {RHS}));
+ IRBuilder<> Builder(findInsertPt(Guard, {LHSV, RHSV}));
return Builder.CreateICmp(Pred, LHSV, RHSV);
}
-Optional<LoopPredication::LoopICmp>
-LoopPredication::generateLoopLatchCheck(Type *RangeCheckType) {
+
+// Returns true if its safe to truncate the IV to RangeCheckType.
+// When the IV type is wider than the range operand type, we can still do loop
+// predication, by generating SCEVs for the range and latch that are of the
+// same type. We achieve this by generating a SCEV truncate expression for the
+// latch IV. This is done iff truncation of the IV is a safe operation,
+// without loss of information.
+// Another way to achieve this is by generating a wider type SCEV for the
+// range check operand, however, this needs a more involved check that
+// operands do not overflow. This can lead to loss of information when the
+// range operand is of the form: add i32 %offset, %iv. We need to prove that
+// sext(x + y) is same as sext(x) + sext(y).
+// This function returns true if we can safely represent the IV type in
+// the RangeCheckType without loss of information.
+static bool isSafeToTruncateWideIVType(const DataLayout &DL,
+ ScalarEvolution &SE,
+ const LoopICmp LatchCheck,
+ Type *RangeCheckType) {
+ if (!EnableIVTruncation)
+ return false;
+ assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()) >
+ DL.getTypeSizeInBits(RangeCheckType) &&
+ "Expected latch check IV type to be larger than range check operand "
+ "type!");
+ // The start and end values of the IV should be known. This is to guarantee
+ // that truncating the wide type will not lose information.
+ auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit);
+ auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart());
+ if (!Limit || !Start)
+ return false;
+ // This check makes sure that the IV does not change sign during loop
+ // iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE,
+ // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the
+ // IV wraps around, and the truncation of the IV would lose the range of
+ // iterations between 2^32 and 2^64.
+ bool Increasing;
+ if (!SE.isMonotonicPredicate(LatchCheck.IV, LatchCheck.Pred, Increasing))
+ return false;
+ // The active bits should be less than the bits in the RangeCheckType. This
+ // guarantees that truncating the latch check to RangeCheckType is a safe
+ // operation.
+ auto RangeCheckTypeBitSize = DL.getTypeSizeInBits(RangeCheckType);
+ return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize &&
+ Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;
+}
+
+
+// Return an LoopICmp describing a latch check equivlent to LatchCheck but with
+// the requested type if safe to do so. May involve the use of a new IV.
+static Optional<LoopICmp> generateLoopLatchCheck(const DataLayout &DL,
+ ScalarEvolution &SE,
+ const LoopICmp LatchCheck,
+ Type *RangeCheckType) {
auto *LatchType = LatchCheck.IV->getType();
if (RangeCheckType == LatchType)
return LatchCheck;
// For now, bail out if latch type is narrower than range type.
- if (DL->getTypeSizeInBits(LatchType) < DL->getTypeSizeInBits(RangeCheckType))
+ if (DL.getTypeSizeInBits(LatchType) < DL.getTypeSizeInBits(RangeCheckType))
return None;
- if (!isSafeToTruncateWideIVType(RangeCheckType))
+ if (!isSafeToTruncateWideIVType(DL, SE, LatchCheck, RangeCheckType))
return None;
// We can now safely identify the truncated version of the IV and limit for
// RangeCheckType.
LoopICmp NewLatchCheck;
NewLatchCheck.Pred = LatchCheck.Pred;
NewLatchCheck.IV = dyn_cast<SCEVAddRecExpr>(
- SE->getTruncateExpr(LatchCheck.IV, RangeCheckType));
+ SE.getTruncateExpr(LatchCheck.IV, RangeCheckType));
if (!NewLatchCheck.IV)
return None;
- NewLatchCheck.Limit = SE->getTruncateExpr(LatchCheck.Limit, RangeCheckType);
+ NewLatchCheck.Limit = SE.getTruncateExpr(LatchCheck.Limit, RangeCheckType);
LLVM_DEBUG(dbgs() << "IV of type: " << *LatchType
<< "can be represented as range check type:"
<< *RangeCheckType << "\n");
@@ -428,13 +491,66 @@ bool LoopPredication::isSupportedStep(const SCEV* Step) {
return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop);
}
-bool LoopPredication::CanExpand(const SCEV* S) {
- return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE);
+Instruction *LoopPredication::findInsertPt(Instruction *Use,
+ ArrayRef<Value*> Ops) {
+ for (Value *Op : Ops)
+ if (!L->isLoopInvariant(Op))
+ return Use;
+ return Preheader->getTerminator();
+}
+
+Instruction *LoopPredication::findInsertPt(Instruction *Use,
+ ArrayRef<const SCEV*> Ops) {
+ // Subtlety: SCEV considers things to be invariant if the value produced is
+ // the same across iterations. This is not the same as being able to
+ // evaluate outside the loop, which is what we actually need here.
+ for (const SCEV *Op : Ops)
+ if (!SE->isLoopInvariant(Op, L) ||
+ !isSafeToExpandAt(Op, Preheader->getTerminator(), *SE))
+ return Use;
+ return Preheader->getTerminator();
+}
+
+bool LoopPredication::isLoopInvariantValue(const SCEV* S) {
+ // Handling expressions which produce invariant results, but *haven't* yet
+ // been removed from the loop serves two important purposes.
+ // 1) Most importantly, it resolves a pass ordering cycle which would
+ // otherwise need us to iteration licm, loop-predication, and either
+ // loop-unswitch or loop-peeling to make progress on examples with lots of
+ // predicable range checks in a row. (Since, in the general case, we can't
+ // hoist the length checks until the dominating checks have been discharged
+ // as we can't prove doing so is safe.)
+ // 2) As a nice side effect, this exposes the value of peeling or unswitching
+ // much more obviously in the IR. Otherwise, the cost modeling for other
+ // transforms would end up needing to duplicate all of this logic to model a
+ // check which becomes predictable based on a modeled peel or unswitch.
+ //
+ // The cost of doing so in the worst case is an extra fill from the stack in
+ // the loop to materialize the loop invariant test value instead of checking
+ // against the original IV which is presumable in a register inside the loop.
+ // Such cases are presumably rare, and hint at missing oppurtunities for
+ // other passes.
+
+ if (SE->isLoopInvariant(S, L))
+ // Note: This the SCEV variant, so the original Value* may be within the
+ // loop even though SCEV has proven it is loop invariant.
+ return true;
+
+ // Handle a particular important case which SCEV doesn't yet know about which
+ // shows up in range checks on arrays with immutable lengths.
+ // TODO: This should be sunk inside SCEV.
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
+ if (const auto *LI = dyn_cast<LoadInst>(U->getValue()))
+ if (LI->isUnordered() && L->hasLoopInvariantOperands(LI))
+ if (AA->pointsToConstantMemory(LI->getOperand(0)) ||
+ LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
+ return true;
+ return false;
}
Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop(
- LoopPredication::LoopICmp LatchCheck, LoopPredication::LoopICmp RangeCheck,
- SCEVExpander &Expander, IRBuilder<> &Builder) {
+ LoopICmp LatchCheck, LoopICmp RangeCheck,
+ SCEVExpander &Expander, Instruction *Guard) {
auto *Ty = RangeCheck.IV->getType();
// Generate the widened condition for the forward loop:
// guardStart u< guardLimit &&
@@ -446,40 +562,61 @@ Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop(
const SCEV *GuardLimit = RangeCheck.Limit;
const SCEV *LatchStart = LatchCheck.IV->getStart();
const SCEV *LatchLimit = LatchCheck.Limit;
+ // Subtlety: We need all the values to be *invariant* across all iterations,
+ // but we only need to check expansion safety for those which *aren't*
+ // already guaranteed to dominate the guard.
+ if (!isLoopInvariantValue(GuardStart) ||
+ !isLoopInvariantValue(GuardLimit) ||
+ !isLoopInvariantValue(LatchStart) ||
+ !isLoopInvariantValue(LatchLimit)) {
+ LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
+ return None;
+ }
+ if (!isSafeToExpandAt(LatchStart, Guard, *SE) ||
+ !isSafeToExpandAt(LatchLimit, Guard, *SE)) {
+ LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
+ return None;
+ }
// guardLimit - guardStart + latchStart - 1
const SCEV *RHS =
SE->getAddExpr(SE->getMinusSCEV(GuardLimit, GuardStart),
SE->getMinusSCEV(LatchStart, SE->getOne(Ty)));
- if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) ||
- !CanExpand(LatchLimit) || !CanExpand(RHS)) {
- LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
- return None;
- }
auto LimitCheckPred =
ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred);
LLVM_DEBUG(dbgs() << "LHS: " << *LatchLimit << "\n");
LLVM_DEBUG(dbgs() << "RHS: " << *RHS << "\n");
LLVM_DEBUG(dbgs() << "Pred: " << LimitCheckPred << "\n");
-
- Instruction *InsertAt = Preheader->getTerminator();
+
auto *LimitCheck =
- expandCheck(Expander, Builder, LimitCheckPred, LatchLimit, RHS, InsertAt);
- auto *FirstIterationCheck = expandCheck(Expander, Builder, RangeCheck.Pred,
- GuardStart, GuardLimit, InsertAt);
+ expandCheck(Expander, Guard, LimitCheckPred, LatchLimit, RHS);
+ auto *FirstIterationCheck = expandCheck(Expander, Guard, RangeCheck.Pred,
+ GuardStart, GuardLimit);
+ IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));
return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
}
Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop(
- LoopPredication::LoopICmp LatchCheck, LoopPredication::LoopICmp RangeCheck,
- SCEVExpander &Expander, IRBuilder<> &Builder) {
+ LoopICmp LatchCheck, LoopICmp RangeCheck,
+ SCEVExpander &Expander, Instruction *Guard) {
auto *Ty = RangeCheck.IV->getType();
const SCEV *GuardStart = RangeCheck.IV->getStart();
const SCEV *GuardLimit = RangeCheck.Limit;
+ const SCEV *LatchStart = LatchCheck.IV->getStart();
const SCEV *LatchLimit = LatchCheck.Limit;
- if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) ||
- !CanExpand(LatchLimit)) {
+ // Subtlety: We need all the values to be *invariant* across all iterations,
+ // but we only need to check expansion safety for those which *aren't*
+ // already guaranteed to dominate the guard.
+ if (!isLoopInvariantValue(GuardStart) ||
+ !isLoopInvariantValue(GuardLimit) ||
+ !isLoopInvariantValue(LatchStart) ||
+ !isLoopInvariantValue(LatchLimit)) {
+ LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
+ return None;
+ }
+ if (!isSafeToExpandAt(LatchStart, Guard, *SE) ||
+ !isSafeToExpandAt(LatchLimit, Guard, *SE)) {
LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
return None;
}
@@ -497,22 +634,35 @@ Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop(
// guardStart u< guardLimit &&
// latchLimit <pred> 1.
// See the header comment for reasoning of the checks.
- Instruction *InsertAt = Preheader->getTerminator();
auto LimitCheckPred =
ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred);
- auto *FirstIterationCheck = expandCheck(Expander, Builder, ICmpInst::ICMP_ULT,
- GuardStart, GuardLimit, InsertAt);
- auto *LimitCheck = expandCheck(Expander, Builder, LimitCheckPred, LatchLimit,
- SE->getOne(Ty), InsertAt);
+ auto *FirstIterationCheck = expandCheck(Expander, Guard,
+ ICmpInst::ICMP_ULT,
+ GuardStart, GuardLimit);
+ auto *LimitCheck = expandCheck(Expander, Guard, LimitCheckPred, LatchLimit,
+ SE->getOne(Ty));
+ IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));
return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
}
+static void normalizePredicate(ScalarEvolution *SE, Loop *L,
+ LoopICmp& RC) {
+ // LFTR canonicalizes checks to the ICMP_NE/EQ form; normalize back to the
+ // ULT/UGE form for ease of handling by our caller.
+ if (ICmpInst::isEquality(RC.Pred) &&
+ RC.IV->getStepRecurrence(*SE)->isOne() &&
+ SE->isKnownPredicate(ICmpInst::ICMP_ULE, RC.IV->getStart(), RC.Limit))
+ RC.Pred = RC.Pred == ICmpInst::ICMP_NE ?
+ ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE;
+}
+
+
/// If ICI can be widened to a loop invariant condition emits the loop
/// invariant condition in the loop preheader and return it, otherwise
/// returns None.
Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
SCEVExpander &Expander,
- IRBuilder<> &Builder) {
+ Instruction *Guard) {
LLVM_DEBUG(dbgs() << "Analyzing ICmpInst condition:\n");
LLVM_DEBUG(ICI->dump());
@@ -545,7 +695,7 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
return None;
}
auto *Ty = RangeCheckIV->getType();
- auto CurrLatchCheckOpt = generateLoopLatchCheck(Ty);
+ auto CurrLatchCheckOpt = generateLoopLatchCheck(*DL, *SE, LatchCheck, Ty);
if (!CurrLatchCheckOpt) {
LLVM_DEBUG(dbgs() << "Failed to generate a loop latch check "
"corresponding to range type: "
@@ -566,34 +716,27 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
if (Step->isOne())
return widenICmpRangeCheckIncrementingLoop(CurrLatchCheck, *RangeCheck,
- Expander, Builder);
+ Expander, Guard);
else {
assert(Step->isAllOnesValue() && "Step should be -1!");
return widenICmpRangeCheckDecrementingLoop(CurrLatchCheck, *RangeCheck,
- Expander, Builder);
+ Expander, Guard);
}
}
-bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
- SCEVExpander &Expander) {
- LLVM_DEBUG(dbgs() << "Processing guard:\n");
- LLVM_DEBUG(Guard->dump());
-
- TotalConsidered++;
-
- IRBuilder<> Builder(cast<Instruction>(Preheader->getTerminator()));
-
+unsigned LoopPredication::collectChecks(SmallVectorImpl<Value *> &Checks,
+ Value *Condition,
+ SCEVExpander &Expander,
+ Instruction *Guard) {
+ unsigned NumWidened = 0;
// The guard condition is expected to be in form of:
// cond1 && cond2 && cond3 ...
// Iterate over subconditions looking for icmp conditions which can be
// widened across loop iterations. Widening these conditions remember the
// resulting list of subconditions in Checks vector.
- SmallVector<Value *, 4> Worklist(1, Guard->getOperand(0));
+ SmallVector<Value *, 4> Worklist(1, Condition);
SmallPtrSet<Value *, 4> Visited;
-
- SmallVector<Value *, 4> Checks;
-
- unsigned NumWidened = 0;
+ Value *WideableCond = nullptr;
do {
Value *Condition = Worklist.pop_back_val();
if (!Visited.insert(Condition).second)
@@ -607,8 +750,16 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
continue;
}
+ if (match(Condition,
+ m_Intrinsic<Intrinsic::experimental_widenable_condition>())) {
+ // Pick any, we don't care which
+ WideableCond = Condition;
+ continue;
+ }
+
if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
- if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander, Builder)) {
+ if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander,
+ Guard)) {
Checks.push_back(NewRangeCheck.getValue());
NumWidened++;
continue;
@@ -617,28 +768,70 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
// Save the condition as is if we can't widen it
Checks.push_back(Condition);
- } while (Worklist.size() != 0);
+ } while (!Worklist.empty());
+ // At the moment, our matching logic for wideable conditions implicitly
+ // assumes we preserve the form: (br (and Cond, WC())). FIXME
+ // Note that if there were multiple calls to wideable condition in the
+ // traversal, we only need to keep one, and which one is arbitrary.
+ if (WideableCond)
+ Checks.push_back(WideableCond);
+ return NumWidened;
+}
+
+bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
+ SCEVExpander &Expander) {
+ LLVM_DEBUG(dbgs() << "Processing guard:\n");
+ LLVM_DEBUG(Guard->dump());
+ TotalConsidered++;
+ SmallVector<Value *, 4> Checks;
+ unsigned NumWidened = collectChecks(Checks, Guard->getOperand(0), Expander,
+ Guard);
+ if (NumWidened == 0)
+ return false;
+
+ TotalWidened += NumWidened;
+
+ // Emit the new guard condition
+ IRBuilder<> Builder(findInsertPt(Guard, Checks));
+ Value *AllChecks = Builder.CreateAnd(Checks);
+ auto *OldCond = Guard->getOperand(0);
+ Guard->setOperand(0, AllChecks);
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+
+ LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
+ return true;
+}
+
+bool LoopPredication::widenWidenableBranchGuardConditions(
+ BranchInst *BI, SCEVExpander &Expander) {
+ assert(isGuardAsWidenableBranch(BI) && "Must be!");
+ LLVM_DEBUG(dbgs() << "Processing guard:\n");
+ LLVM_DEBUG(BI->dump());
+
+ TotalConsidered++;
+ SmallVector<Value *, 4> Checks;
+ unsigned NumWidened = collectChecks(Checks, BI->getCondition(),
+ Expander, BI);
if (NumWidened == 0)
return false;
TotalWidened += NumWidened;
// Emit the new guard condition
- Builder.SetInsertPoint(Guard);
- Value *LastCheck = nullptr;
- for (auto *Check : Checks)
- if (!LastCheck)
- LastCheck = Check;
- else
- LastCheck = Builder.CreateAnd(LastCheck, Check);
- Guard->setOperand(0, LastCheck);
+ IRBuilder<> Builder(findInsertPt(BI, Checks));
+ Value *AllChecks = Builder.CreateAnd(Checks);
+ auto *OldCond = BI->getCondition();
+ BI->setCondition(AllChecks);
+ assert(isGuardAsWidenableBranch(BI) &&
+ "Stopped being a guard after transform?");
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
return true;
}
-Optional<LoopPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() {
+Optional<LoopICmp> LoopPredication::parseLoopLatchICmp() {
using namespace PatternMatch;
BasicBlock *LoopLatch = L->getLoopLatch();
@@ -647,27 +840,30 @@ Optional<LoopPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() {
return None;
}
- ICmpInst::Predicate Pred;
- Value *LHS, *RHS;
- BasicBlock *TrueDest, *FalseDest;
-
- if (!match(LoopLatch->getTerminator(),
- m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TrueDest,
- FalseDest))) {
+ auto *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator());
+ if (!BI || !BI->isConditional()) {
LLVM_DEBUG(dbgs() << "Failed to match the latch terminator!\n");
return None;
}
- assert((TrueDest == L->getHeader() || FalseDest == L->getHeader()) &&
- "One of the latch's destinations must be the header");
- if (TrueDest != L->getHeader())
- Pred = ICmpInst::getInversePredicate(Pred);
-
- auto Result = parseLoopICmp(Pred, LHS, RHS);
+ BasicBlock *TrueDest = BI->getSuccessor(0);
+ assert(
+ (TrueDest == L->getHeader() || BI->getSuccessor(1) == L->getHeader()) &&
+ "One of the latch's destinations must be the header");
+
+ auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!ICI) {
+ LLVM_DEBUG(dbgs() << "Failed to match the latch condition!\n");
+ return None;
+ }
+ auto Result = parseLoopICmp(ICI);
if (!Result) {
LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");
return None;
}
+ if (TrueDest != L->getHeader())
+ Result->Pred = ICmpInst::getInversePredicate(Result->Pred);
+
// Check affine first, so if it's not we don't try to compute the step
// recurrence.
if (!Result->IV->isAffine()) {
@@ -692,49 +888,22 @@ Optional<LoopPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() {
}
};
+ normalizePredicate(SE, L, *Result);
if (IsUnsupportedPredicate(Step, Result->Pred)) {
LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred
<< ")!\n");
return None;
}
+
return Result;
}
-// Returns true if its safe to truncate the IV to RangeCheckType.
-bool LoopPredication::isSafeToTruncateWideIVType(Type *RangeCheckType) {
- if (!EnableIVTruncation)
- return false;
- assert(DL->getTypeSizeInBits(LatchCheck.IV->getType()) >
- DL->getTypeSizeInBits(RangeCheckType) &&
- "Expected latch check IV type to be larger than range check operand "
- "type!");
- // The start and end values of the IV should be known. This is to guarantee
- // that truncating the wide type will not lose information.
- auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit);
- auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart());
- if (!Limit || !Start)
- return false;
- // This check makes sure that the IV does not change sign during loop
- // iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE,
- // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the
- // IV wraps around, and the truncation of the IV would lose the range of
- // iterations between 2^32 and 2^64.
- bool Increasing;
- if (!SE->isMonotonicPredicate(LatchCheck.IV, LatchCheck.Pred, Increasing))
- return false;
- // The active bits should be less than the bits in the RangeCheckType. This
- // guarantees that truncating the latch check to RangeCheckType is a safe
- // operation.
- auto RangeCheckTypeBitSize = DL->getTypeSizeInBits(RangeCheckType);
- return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize &&
- Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;
-}
bool LoopPredication::isLoopProfitableToPredicate() {
if (SkipProfitabilityChecks || !BPI)
return true;
- SmallVector<std::pair<const BasicBlock *, const BasicBlock *>, 8> ExitEdges;
+ SmallVector<std::pair<BasicBlock *, BasicBlock *>, 8> ExitEdges;
L->getExitEdges(ExitEdges);
// If there is only one exiting edge in the loop, it is always profitable to
// predicate the loop.
@@ -795,7 +964,12 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
// There is nothing to do if the module doesn't use guards
auto *GuardDecl =
M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard));
- if (!GuardDecl || GuardDecl->use_empty())
+ bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty();
+ auto *WCDecl = M->getFunction(
+ Intrinsic::getName(Intrinsic::experimental_widenable_condition));
+ bool HasWidenableConditions =
+ PredicateWidenableBranchGuards && WCDecl && !WCDecl->use_empty();
+ if (!HasIntrinsicGuards && !HasWidenableConditions)
return false;
DL = &M->getDataLayout();
@@ -819,12 +993,18 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
// Collect all the guards into a vector and process later, so as not
// to invalidate the instruction iterator.
SmallVector<IntrinsicInst *, 4> Guards;
- for (const auto BB : L->blocks())
+ SmallVector<BranchInst *, 4> GuardsAsWidenableBranches;
+ for (const auto BB : L->blocks()) {
for (auto &I : *BB)
if (isGuard(&I))
Guards.push_back(cast<IntrinsicInst>(&I));
+ if (PredicateWidenableBranchGuards &&
+ isGuardAsWidenableBranch(BB->getTerminator()))
+ GuardsAsWidenableBranches.push_back(
+ cast<BranchInst>(BB->getTerminator()));
+ }
- if (Guards.empty())
+ if (Guards.empty() && GuardsAsWidenableBranches.empty())
return false;
SCEVExpander Expander(*SE, *DL, "loop-predication");
@@ -832,6 +1012,8 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
bool Changed = false;
for (auto *Guard : Guards)
Changed |= widenGuardConditions(Guard, Expander);
+ for (auto *Guard : GuardsAsWidenableBranches)
+ Changed |= widenWidenableBranchGuardConditions(Guard, Expander);
return Changed;
}
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index 9a99e5925572..166b57f20b43 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -1,9 +1,8 @@
//===- LoopReroll.cpp - Loop rerolling pass -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -892,12 +891,22 @@ bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) {
const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
if (!ADR)
return false;
+
+ // Check that the first root is evenly spaced.
unsigned N = DRS.Roots.size() + 1;
const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR);
const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV))
return false;
+ // Check that the remainling roots are evenly spaced.
+ for (unsigned i = 1; i < N - 1; ++i) {
+ const SCEV *NewStepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[i]),
+ SE->getSCEV(DRS.Roots[i-1]));
+ if (NewStepSCEV != StepSCEV)
+ return false;
+ }
+
return true;
}
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index fd22128f7fe6..e009947690af 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -1,9 +1,8 @@
//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -55,7 +54,10 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
if (AR.MSSA && VerifyMemorySSA)
AR.MSSA->verifyMemorySSA();
- return getLoopPassPreservedAnalyses();
+ auto PA = getLoopPassPreservedAnalyses();
+ if (EnableMSSALoopDependency)
+ PA.preserve<MemorySSAAnalysis>();
+ return PA;
}
namespace {
diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index 2e5927f9a068..046f4c8af492 100644
--- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -1,9 +1,8 @@
//===--------- LoopSimplifyCFG.cpp - Loop CFG Simplification Pass ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,6 +20,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -29,7 +29,6 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
@@ -42,7 +41,7 @@ using namespace llvm;
#define DEBUG_TYPE "loop-simplifycfg"
static cl::opt<bool> EnableTermFolding("enable-loop-simplifycfg-term-folding",
- cl::init(false));
+ cl::init(true));
STATISTIC(NumTerminatorsFolded,
"Number of terminators folded to unconditional branches");
@@ -80,6 +79,36 @@ static BasicBlock *getOnlyLiveSuccessor(BasicBlock *BB) {
return nullptr;
}
+/// Removes \p BB from all loops from [FirstLoop, LastLoop) in parent chain.
+static void removeBlockFromLoops(BasicBlock *BB, Loop *FirstLoop,
+ Loop *LastLoop = nullptr) {
+ assert((!LastLoop || LastLoop->contains(FirstLoop->getHeader())) &&
+ "First loop is supposed to be inside of last loop!");
+ assert(FirstLoop->contains(BB) && "Must be a loop block!");
+ for (Loop *Current = FirstLoop; Current != LastLoop;
+ Current = Current->getParentLoop())
+ Current->removeBlockFromLoop(BB);
+}
+
+/// Find innermost loop that contains at least one block from \p BBs and
+/// contains the header of loop \p L.
+static Loop *getInnermostLoopFor(SmallPtrSetImpl<BasicBlock *> &BBs,
+ Loop &L, LoopInfo &LI) {
+ Loop *Innermost = nullptr;
+ for (BasicBlock *BB : BBs) {
+ Loop *BBL = LI.getLoopFor(BB);
+ while (BBL && !BBL->contains(L.getHeader()))
+ BBL = BBL->getParentLoop();
+ if (BBL == &L)
+ BBL = BBL->getParentLoop();
+ if (!BBL)
+ continue;
+ if (!Innermost || BBL->getLoopDepth() > Innermost->getLoopDepth())
+ Innermost = BBL;
+ }
+ return Innermost;
+}
+
namespace {
/// Helper class that can turn branches and switches with constant conditions
/// into unconditional branches.
@@ -90,6 +119,9 @@ private:
DominatorTree &DT;
ScalarEvolution &SE;
MemorySSAUpdater *MSSAU;
+ LoopBlocksDFS DFS;
+ DomTreeUpdater DTU;
+ SmallVector<DominatorTree::UpdateType, 16> DTUpdates;
// Whether or not the current loop has irreducible CFG.
bool HasIrreducibleCFG = false;
@@ -175,7 +207,6 @@ private:
/// Fill all information about status of blocks and exits of the current loop
/// if constant folding of all branches will be done.
void analyze() {
- LoopBlocksDFS DFS(&L);
DFS.perform(&LI);
assert(DFS.isComplete() && "DFS is expected to be finished");
@@ -208,12 +239,13 @@ private:
// folding. Only handle blocks from current loop: branches in child loops
// are skipped because if they can be folded, they should be folded during
// the processing of child loops.
- if (TheOnlySucc && LI.getLoopFor(BB) == &L)
+ bool TakeFoldCandidate = TheOnlySucc && LI.getLoopFor(BB) == &L;
+ if (TakeFoldCandidate)
FoldCandidates.push_back(BB);
// Handle successors.
for (BasicBlock *Succ : successors(BB))
- if (!TheOnlySucc || TheOnlySucc == Succ) {
+ if (!TakeFoldCandidate || TheOnlySucc == Succ) {
if (L.contains(Succ))
LiveLoopBlocks.insert(Succ);
else
@@ -229,8 +261,10 @@ private:
// Now, all exit blocks that are not marked as live are dead.
SmallVector<BasicBlock *, 8> ExitBlocks;
L.getExitBlocks(ExitBlocks);
+ SmallPtrSet<BasicBlock *, 8> UniqueDeadExits;
for (auto *ExitBlock : ExitBlocks)
- if (!LiveExitBlocks.count(ExitBlock))
+ if (!LiveExitBlocks.count(ExitBlock) &&
+ UniqueDeadExits.insert(ExitBlock).second)
DeadExitBlocks.push_back(ExitBlock);
// Whether or not the edge From->To will still be present in graph after the
@@ -239,7 +273,7 @@ private:
if (!LiveLoopBlocks.count(From))
return false;
BasicBlock *TheOnlySucc = getOnlyLiveSuccessor(From);
- return !TheOnlySucc || TheOnlySucc == To;
+ return !TheOnlySucc || TheOnlySucc == To || LI.getLoopFor(From) != &L;
};
// The loop will not be destroyed if its latch is live.
@@ -317,14 +351,10 @@ private:
// Construct split preheader and the dummy switch to thread edges from it to
// dead exits.
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
BasicBlock *Preheader = L.getLoopPreheader();
- BasicBlock *NewPreheader = Preheader->splitBasicBlock(
- Preheader->getTerminator(),
- Twine(Preheader->getName()).concat("-split"));
- DTU.deleteEdge(Preheader, L.getHeader());
- DTU.insertEdge(NewPreheader, L.getHeader());
- DTU.insertEdge(Preheader, NewPreheader);
+ BasicBlock *NewPreheader = llvm::SplitBlock(
+ Preheader, Preheader->getTerminator(), &DT, &LI, MSSAU);
+
IRBuilder<> Builder(Preheader->getTerminator());
SwitchInst *DummySwitch =
Builder.CreateSwitch(Builder.getInt32(0), NewPreheader);
@@ -343,75 +373,106 @@ private:
}
assert(DummyIdx != 0 && "Too many dead exits!");
DummySwitch->addCase(Builder.getInt32(DummyIdx++), BB);
- DTU.insertEdge(Preheader, BB);
+ DTUpdates.push_back({DominatorTree::Insert, Preheader, BB});
++NumLoopExitsDeleted;
}
assert(L.getLoopPreheader() == NewPreheader && "Malformed CFG?");
if (Loop *OuterLoop = LI.getLoopFor(Preheader)) {
- OuterLoop->addBasicBlockToLoop(NewPreheader, LI);
-
// When we break dead edges, the outer loop may become unreachable from
// the current loop. We need to fix loop info accordingly. For this, we
// find the most nested loop that still contains L and remove L from all
// loops that are inside of it.
- Loop *StillReachable = nullptr;
- for (BasicBlock *BB : LiveExitBlocks) {
- Loop *BBL = LI.getLoopFor(BB);
- if (BBL && BBL->contains(L.getHeader()))
- if (!StillReachable ||
- BBL->getLoopDepth() > StillReachable->getLoopDepth())
- StillReachable = BBL;
- }
+ Loop *StillReachable = getInnermostLoopFor(LiveExitBlocks, L, LI);
// Okay, our loop is no longer in the outer loop (and maybe not in some of
// its parents as well). Make the fixup.
if (StillReachable != OuterLoop) {
LI.changeLoopFor(NewPreheader, StillReachable);
- for (Loop *NotContaining = OuterLoop; NotContaining != StillReachable;
- NotContaining = NotContaining->getParentLoop()) {
- NotContaining->removeBlockFromLoop(NewPreheader);
- for (auto *BB : L.blocks())
- NotContaining->removeBlockFromLoop(BB);
- }
+ removeBlockFromLoops(NewPreheader, OuterLoop, StillReachable);
+ for (auto *BB : L.blocks())
+ removeBlockFromLoops(BB, OuterLoop, StillReachable);
OuterLoop->removeChildLoop(&L);
if (StillReachable)
StillReachable->addChildLoop(&L);
else
LI.addTopLevelLoop(&L);
+
+ // Some values from loops in [OuterLoop, StillReachable) could be used
+ // in the current loop. Now it is not their child anymore, so such uses
+ // require LCSSA Phis.
+ Loop *FixLCSSALoop = OuterLoop;
+ while (FixLCSSALoop->getParentLoop() != StillReachable)
+ FixLCSSALoop = FixLCSSALoop->getParentLoop();
+ assert(FixLCSSALoop && "Should be a loop!");
+ // We need all DT updates to be done before forming LCSSA.
+ DTU.applyUpdates(DTUpdates);
+ if (MSSAU)
+ MSSAU->applyUpdates(DTUpdates, DT);
+ DTUpdates.clear();
+ formLCSSARecursively(*FixLCSSALoop, DT, &LI, &SE);
}
}
+
+ if (MSSAU) {
+ // Clear all updates now. Facilitates deletes that follow.
+ DTU.applyUpdates(DTUpdates);
+ MSSAU->applyUpdates(DTUpdates, DT);
+ DTUpdates.clear();
+ if (VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+ }
}
/// Delete loop blocks that have become unreachable after folding. Make all
/// relevant updates to DT and LI.
void deleteDeadLoopBlocks() {
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
if (MSSAU) {
- SmallPtrSet<BasicBlock *, 8> DeadLoopBlocksSet(DeadLoopBlocks.begin(),
- DeadLoopBlocks.end());
+ SmallSetVector<BasicBlock *, 8> DeadLoopBlocksSet(DeadLoopBlocks.begin(),
+ DeadLoopBlocks.end());
MSSAU->removeBlocks(DeadLoopBlocksSet);
}
+
+ // The function LI.erase has some invariants that need to be preserved when
+ // it tries to remove a loop which is not the top-level loop. In particular,
+ // it requires loop's preheader to be strictly in loop's parent. We cannot
+ // just remove blocks one by one, because after removal of preheader we may
+ // break this invariant for the dead loop. So we detatch and erase all dead
+ // loops beforehand.
+ for (auto *BB : DeadLoopBlocks)
+ if (LI.isLoopHeader(BB)) {
+ assert(LI.getLoopFor(BB) != &L && "Attempt to remove current loop!");
+ Loop *DL = LI.getLoopFor(BB);
+ if (DL->getParentLoop()) {
+ for (auto *PL = DL->getParentLoop(); PL; PL = PL->getParentLoop())
+ for (auto *BB : DL->getBlocks())
+ PL->removeBlockFromLoop(BB);
+ DL->getParentLoop()->removeChildLoop(DL);
+ LI.addTopLevelLoop(DL);
+ }
+ LI.erase(DL);
+ }
+
for (auto *BB : DeadLoopBlocks) {
assert(BB != L.getHeader() &&
"Header of the current loop cannot be dead!");
LLVM_DEBUG(dbgs() << "Deleting dead loop block " << BB->getName()
<< "\n");
- if (LI.isLoopHeader(BB)) {
- assert(LI.getLoopFor(BB) != &L && "Attempt to remove current loop!");
- LI.erase(LI.getLoopFor(BB));
- }
LI.removeBlock(BB);
- DeleteDeadBlock(BB, &DTU);
- ++NumLoopBlocksDeleted;
}
+
+ DetatchDeadBlocks(DeadLoopBlocks, &DTUpdates, /*KeepOneInputPHIs*/true);
+ DTU.applyUpdates(DTUpdates);
+ DTUpdates.clear();
+ for (auto *BB : DeadLoopBlocks)
+ DTU.deleteBB(BB);
+
+ NumLoopBlocksDeleted += DeadLoopBlocks.size();
}
/// Constant-fold terminators of blocks acculumated in FoldCandidates into the
/// unconditional branches.
void foldTerminators() {
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
-
for (BasicBlock *BB : FoldCandidates) {
assert(LI.getLoopFor(BB) == &L && "Should be a loop block!");
BasicBlock *TheOnlySucc = getOnlyLiveSuccessor(BB);
@@ -453,7 +514,7 @@ private:
Term->eraseFromParent();
for (auto *DeadSucc : DeadSuccessors)
- DTU.deleteEdge(BB, DeadSucc);
+ DTUpdates.push_back({DominatorTree::Delete, BB, DeadSucc});
++NumTerminatorsFolded;
}
@@ -463,15 +524,18 @@ public:
ConstantTerminatorFoldingImpl(Loop &L, LoopInfo &LI, DominatorTree &DT,
ScalarEvolution &SE,
MemorySSAUpdater *MSSAU)
- : L(L), LI(LI), DT(DT), SE(SE), MSSAU(MSSAU) {}
+ : L(L), LI(LI), DT(DT), SE(SE), MSSAU(MSSAU), DFS(&L),
+ DTU(DT, DomTreeUpdater::UpdateStrategy::Eager) {}
bool run() {
assert(L.getLoopLatch() && "Should be single latch!");
// Collect all available information about status of blocks after constant
// folding.
analyze();
+ BasicBlock *Header = L.getHeader();
+ (void)Header;
- LLVM_DEBUG(dbgs() << "In function " << L.getHeader()->getParent()->getName()
+ LLVM_DEBUG(dbgs() << "In function " << Header->getParent()->getName()
<< ": ");
if (HasIrreducibleCFG) {
@@ -483,7 +547,7 @@ public:
if (FoldCandidates.empty()) {
LLVM_DEBUG(
dbgs() << "No constant terminator folding candidates found in loop "
- << L.getHeader()->getName() << "\n");
+ << Header->getName() << "\n");
return false;
}
@@ -491,8 +555,7 @@ public:
if (DeleteCurrentLoop) {
LLVM_DEBUG(
dbgs()
- << "Give up constant terminator folding in loop "
- << L.getHeader()->getName()
+ << "Give up constant terminator folding in loop " << Header->getName()
<< ": we don't currently support deletion of the current loop.\n");
return false;
}
@@ -503,8 +566,7 @@ public:
L.getNumBlocks()) {
LLVM_DEBUG(
dbgs() << "Give up constant terminator folding in loop "
- << L.getHeader()->getName()
- << ": we don't currently"
+ << Header->getName() << ": we don't currently"
" support blocks that are not dead, but will stop "
"being a part of the loop after constant-folding.\n");
return false;
@@ -515,8 +577,7 @@ public:
LLVM_DEBUG(dump());
LLVM_DEBUG(dbgs() << "Constant-folding " << FoldCandidates.size()
- << " terminators in loop " << L.getHeader()->getName()
- << "\n");
+ << " terminators in loop " << Header->getName() << "\n");
// Make the actual transforms.
handleDeadExits();
@@ -524,20 +585,36 @@ public:
if (!DeadLoopBlocks.empty()) {
LLVM_DEBUG(dbgs() << "Deleting " << DeadLoopBlocks.size()
- << " dead blocks in loop " << L.getHeader()->getName()
- << "\n");
+ << " dead blocks in loop " << Header->getName() << "\n");
deleteDeadLoopBlocks();
+ } else {
+ // If we didn't do updates inside deleteDeadLoopBlocks, do them here.
+ DTU.applyUpdates(DTUpdates);
+ DTUpdates.clear();
}
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
#ifndef NDEBUG
// Make sure that we have preserved all data structures after the transform.
- DT.verify();
- assert(DT.isReachableFromEntry(L.getHeader()));
+#if defined(EXPENSIVE_CHECKS)
+ assert(DT.verify(DominatorTree::VerificationLevel::Full) &&
+ "DT broken after transform!");
+#else
+ assert(DT.verify(DominatorTree::VerificationLevel::Fast) &&
+ "DT broken after transform!");
+#endif
+ assert(DT.isReachableFromEntry(Header));
LI.verify(DT);
#endif
return true;
}
+
+ bool foldingBreaksCurrentLoop() const {
+ return DeleteCurrentLoop;
+ }
};
} // namespace
@@ -545,7 +622,8 @@ public:
/// branches.
static bool constantFoldTerminators(Loop &L, DominatorTree &DT, LoopInfo &LI,
ScalarEvolution &SE,
- MemorySSAUpdater *MSSAU) {
+ MemorySSAUpdater *MSSAU,
+ bool &IsLoopDeleted) {
if (!EnableTermFolding)
return false;
@@ -555,7 +633,9 @@ static bool constantFoldTerminators(Loop &L, DominatorTree &DT, LoopInfo &LI,
return false;
ConstantTerminatorFoldingImpl BranchFolder(L, LI, DT, SE, MSSAU);
- return BranchFolder.run();
+ bool Changed = BranchFolder.run();
+ IsLoopDeleted = Changed && BranchFolder.foldingBreaksCurrentLoop();
+ return Changed;
}
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT,
@@ -587,11 +667,15 @@ static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT,
}
static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI,
- ScalarEvolution &SE, MemorySSAUpdater *MSSAU) {
+ ScalarEvolution &SE, MemorySSAUpdater *MSSAU,
+ bool &isLoopDeleted) {
bool Changed = false;
// Constant-fold terminators with known constant conditions.
- Changed |= constantFoldTerminators(L, DT, LI, SE, MSSAU);
+ Changed |= constantFoldTerminators(L, DT, LI, SE, MSSAU, isLoopDeleted);
+
+ if (isLoopDeleted)
+ return true;
// Eliminate unconditional branches by merging blocks into their predecessors.
Changed |= mergeBlocksIntoPredecessors(L, DT, LI, MSSAU);
@@ -604,15 +688,23 @@ static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI,
PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
- LPMUpdater &) {
+ LPMUpdater &LPMU) {
Optional<MemorySSAUpdater> MSSAU;
if (EnableMSSALoopDependency && AR.MSSA)
MSSAU = MemorySSAUpdater(AR.MSSA);
+ bool DeleteCurrentLoop = false;
if (!simplifyLoopCFG(L, AR.DT, AR.LI, AR.SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
+ MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
+ DeleteCurrentLoop))
return PreservedAnalyses::all();
- return getLoopPassPreservedAnalyses();
+ if (DeleteCurrentLoop)
+ LPMU.markLoopAsDeleted(L, "loop-simplifycfg");
+
+ auto PA = getLoopPassPreservedAnalyses();
+ if (EnableMSSALoopDependency)
+ PA.preserve<MemorySSAAnalysis>();
+ return PA;
}
namespace {
@@ -623,7 +715,7 @@ public:
initializeLoopSimplifyCFGLegacyPassPass(*PassRegistry::getPassRegistry());
}
- bool runOnLoop(Loop *L, LPPassManager &) override {
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override {
if (skipLoop(L))
return false;
@@ -637,8 +729,13 @@ public:
if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
}
- return simplifyLoopCFG(*L, DT, LI, SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
+ bool DeleteCurrentLoop = false;
+ bool Changed = simplifyLoopCFG(
+ *L, DT, LI, SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
+ DeleteCurrentLoop);
+ if (DeleteCurrentLoop)
+ LPM.markLoopAsDeleted(*L);
+ return Changed;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp
index 2f7ad2126ed3..975452e13f09 100644
--- a/lib/Transforms/Scalar/LoopSink.cpp
+++ b/lib/Transforms/Scalar/LoopSink.cpp
@@ -1,9 +1,8 @@
//===-- LoopSink.cpp - Loop Sink Pass -------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -291,10 +290,9 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
ColdLoopBBs.push_back(B);
LoopBlockNumber[B] = ++i;
}
- std::stable_sort(ColdLoopBBs.begin(), ColdLoopBBs.end(),
- [&](BasicBlock *A, BasicBlock *B) {
- return BFI.getBlockFreq(A) < BFI.getBlockFreq(B);
- });
+ llvm::stable_sort(ColdLoopBBs, [&](BasicBlock *A, BasicBlock *B) {
+ return BFI.getBlockFreq(A) < BFI.getBlockFreq(B);
+ });
// Traverse preheader's instructions in reverse order becaue if A depends
// on B (A appears after B), A needs to be sinked first before B can be
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 773ffb9df0a2..59a387a186b8 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1,9 +1,8 @@
//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -116,6 +115,7 @@
#include <cstdlib>
#include <iterator>
#include <limits>
+#include <numeric>
#include <map>
#include <utility>
@@ -155,11 +155,19 @@ static cl::opt<bool> FilterSameScaledReg(
cl::desc("Narrow LSR search space by filtering non-optimal formulae"
" with the same ScaledReg and Scale"));
+static cl::opt<bool> EnableBackedgeIndexing(
+ "lsr-backedge-indexing", cl::Hidden, cl::init(true),
+ cl::desc("Enable the generation of cross iteration indexed memops"));
+
static cl::opt<unsigned> ComplexityLimit(
"lsr-complexity-limit", cl::Hidden,
cl::init(std::numeric_limits<uint16_t>::max()),
cl::desc("LSR search space complexity limit"));
+static cl::opt<unsigned> SetupCostDepthLimit(
+ "lsr-setupcost-depth-limit", cl::Hidden, cl::init(7),
+ cl::desc("The limit on recursion depth for LSRs setup cost"));
+
#ifndef NDEBUG
// Stress test IV chain generation.
static cl::opt<bool> StressIVChain(
@@ -1007,10 +1015,15 @@ namespace {
/// This class is used to measure and compare candidate formulae.
class Cost {
+ const Loop *L = nullptr;
+ ScalarEvolution *SE = nullptr;
+ const TargetTransformInfo *TTI = nullptr;
TargetTransformInfo::LSRCost C;
public:
- Cost() {
+ Cost() = delete;
+ Cost(const Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI) :
+ L(L), SE(&SE), TTI(&TTI) {
C.Insns = 0;
C.NumRegs = 0;
C.AddRecCost = 0;
@@ -1021,7 +1034,7 @@ public:
C.ScaleCost = 0;
}
- bool isLess(Cost &Other, const TargetTransformInfo &TTI);
+ bool isLess(Cost &Other);
void Lose();
@@ -1040,12 +1053,9 @@ public:
return C.NumRegs == ~0u;
}
- void RateFormula(const TargetTransformInfo &TTI,
- const Formula &F,
+ void RateFormula(const Formula &F,
SmallPtrSetImpl<const SCEV *> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
- const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);
@@ -1053,17 +1063,11 @@ public:
void dump() const;
private:
- void RateRegister(const SCEV *Reg,
- SmallPtrSetImpl<const SCEV *> &Regs,
- const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT,
- const TargetTransformInfo &TTI);
- void RatePrimaryRegister(const SCEV *Reg,
+ void RateRegister(const Formula &F, const SCEV *Reg,
+ SmallPtrSetImpl<const SCEV *> &Regs);
+ void RatePrimaryRegister(const Formula &F, const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
- const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT,
- SmallPtrSetImpl<const SCEV *> *LoserRegs,
- const TargetTransformInfo &TTI);
+ SmallPtrSetImpl<const SCEV *> *LoserRegs);
};
/// An operand value in an instruction which is to be replaced with some
@@ -1208,19 +1212,36 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
bool HasBaseReg, int64_t Scale,
Instruction *Fixup = nullptr);
+static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) {
+ if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
+ return 1;
+ if (Depth == 0)
+ return 0;
+ if (const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
+ return getSetupCost(S->getStart(), Depth - 1);
+ if (auto S = dyn_cast<SCEVCastExpr>(Reg))
+ return getSetupCost(S->getOperand(), Depth - 1);
+ if (auto S = dyn_cast<SCEVNAryExpr>(Reg))
+ return std::accumulate(S->op_begin(), S->op_end(), 0,
+ [&](unsigned i, const SCEV *Reg) {
+ return i + getSetupCost(Reg, Depth - 1);
+ });
+ if (auto S = dyn_cast<SCEVUDivExpr>(Reg))
+ return getSetupCost(S->getLHS(), Depth - 1) +
+ getSetupCost(S->getRHS(), Depth - 1);
+ return 0;
+}
+
/// Tally up interesting quantities from the given register.
-void Cost::RateRegister(const SCEV *Reg,
- SmallPtrSetImpl<const SCEV *> &Regs,
- const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT,
- const TargetTransformInfo &TTI) {
+void Cost::RateRegister(const Formula &F, const SCEV *Reg,
+ SmallPtrSetImpl<const SCEV *> &Regs) {
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
// If this is an addrec for another loop, it should be an invariant
// with respect to L since L is the innermost loop (at least
// for now LSR only handles innermost loops).
if (AR->getLoop() != L) {
// If the AddRec exists, consider it's register free and leave it alone.
- if (isExistingPhi(AR, SE))
+ if (isExistingPhi(AR, *SE))
return;
// It is bad to allow LSR for current loop to add induction variables
@@ -1236,16 +1257,24 @@ void Cost::RateRegister(const SCEV *Reg,
}
unsigned LoopCost = 1;
- if (TTI.shouldFavorPostInc()) {
- const SCEV *LoopStep = AR->getStepRecurrence(SE);
- if (isa<SCEVConstant>(LoopStep)) {
- // Check if a post-indexed load/store can be used.
- if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
- TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
+ if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||
+ TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {
+
+ // If the step size matches the base offset, we could use pre-indexed
+ // addressing.
+ if (TTI->shouldFavorBackedgeIndex(L)) {
+ if (auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
+ if (Step->getAPInt() == F.BaseOffset)
+ LoopCost = 0;
+ }
+
+ if (TTI->shouldFavorPostInc()) {
+ const SCEV *LoopStep = AR->getStepRecurrence(*SE);
+ if (isa<SCEVConstant>(LoopStep)) {
const SCEV *LoopStart = AR->getStart();
if (!isa<SCEVConstant>(LoopStart) &&
- SE.isLoopInvariant(LoopStart, L))
- LoopCost = 0;
+ SE->isLoopInvariant(LoopStart, L))
+ LoopCost = 0;
}
}
}
@@ -1255,7 +1284,7 @@ void Cost::RateRegister(const SCEV *Reg,
// TODO: The non-affine case isn't precisely modeled here.
if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
if (!Regs.count(AR->getOperand(1))) {
- RateRegister(AR->getOperand(1), Regs, L, SE, DT, TTI);
+ RateRegister(F, AR->getOperand(1), Regs);
if (isLoser())
return;
}
@@ -1265,43 +1294,34 @@ void Cost::RateRegister(const SCEV *Reg,
// Rough heuristic; favor registers which don't require extra setup
// instructions in the preheader.
- if (!isa<SCEVUnknown>(Reg) &&
- !isa<SCEVConstant>(Reg) &&
- !(isa<SCEVAddRecExpr>(Reg) &&
- (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
- isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
- ++C.SetupCost;
+ C.SetupCost += getSetupCost(Reg, SetupCostDepthLimit);
+ // Ensure we don't, even with the recusion limit, produce invalid costs.
+ C.SetupCost = std::min<unsigned>(C.SetupCost, 1 << 16);
C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
- SE.hasComputableLoopEvolution(Reg, L);
+ SE->hasComputableLoopEvolution(Reg, L);
}
/// Record this register in the set. If we haven't seen it before, rate
/// it. Optional LoserRegs provides a way to declare any formula that refers to
/// one of those regs an instant loser.
-void Cost::RatePrimaryRegister(const SCEV *Reg,
+void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
- const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT,
- SmallPtrSetImpl<const SCEV *> *LoserRegs,
- const TargetTransformInfo &TTI) {
+ SmallPtrSetImpl<const SCEV *> *LoserRegs) {
if (LoserRegs && LoserRegs->count(Reg)) {
Lose();
return;
}
if (Regs.insert(Reg).second) {
- RateRegister(Reg, Regs, L, SE, DT, TTI);
+ RateRegister(F, Reg, Regs);
if (LoserRegs && isLoser())
LoserRegs->insert(Reg);
}
}
-void Cost::RateFormula(const TargetTransformInfo &TTI,
- const Formula &F,
+void Cost::RateFormula(const Formula &F,
SmallPtrSetImpl<const SCEV *> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
- const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
SmallPtrSetImpl<const SCEV *> *LoserRegs) {
assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
@@ -1314,7 +1334,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
Lose();
return;
}
- RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs, TTI);
+ RatePrimaryRegister(F, ScaledReg, Regs, LoserRegs);
if (isLoser())
return;
}
@@ -1323,7 +1343,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
Lose();
return;
}
- RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs, TTI);
+ RatePrimaryRegister(F, BaseReg, Regs, LoserRegs);
if (isLoser())
return;
}
@@ -1334,11 +1354,11 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
// Do not count the base and a possible second register if the target
// allows to fold 2 registers.
C.NumBaseAdds +=
- NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
+ NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(*TTI, LU, F)));
C.NumBaseAdds += (F.UnfoldedOffset != 0);
// Accumulate non-free scaling amounts.
- C.ScaleCost += getScalingFactorCost(TTI, LU, F, *L);
+ C.ScaleCost += getScalingFactorCost(*TTI, LU, F, *L);
// Tally up the non-zero immediates.
for (const LSRFixup &Fixup : LU.Fixups) {
@@ -1353,7 +1373,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
// Check with target if this offset with this instruction is
// specifically not supported.
if (LU.Kind == LSRUse::Address && Offset != 0 &&
- !isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
+ !isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
Offset, F.HasBaseReg, F.Scale, Fixup.UserInst))
C.NumBaseAdds++;
}
@@ -1366,7 +1386,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
// Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
// additional instruction (at least fill).
- unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1;
+ unsigned TTIRegNum = TTI->getNumberOfRegisters(false) - 1;
if (C.NumRegs > TTIRegNum) {
// Cost already exceeded TTIRegNum, then only newly added register can add
// new instructions.
@@ -1386,7 +1406,8 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
//
// For {-10, +, 1}:
// i = i + 1;
- if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() && !TTI.canMacroFuseCmp())
+ if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() &&
+ !TTI->canMacroFuseCmp())
C.Insns++;
// Each new AddRec adds 1 instruction to calculation.
C.Insns += (C.AddRecCost - PrevAddRecCost);
@@ -1410,11 +1431,11 @@ void Cost::Lose() {
}
/// Choose the lower cost.
-bool Cost::isLess(Cost &Other, const TargetTransformInfo &TTI) {
+bool Cost::isLess(Cost &Other) {
if (InsnsCost.getNumOccurrences() > 0 && InsnsCost &&
C.Insns != Other.C.Insns)
return C.Insns < Other.C.Insns;
- return TTI.isLSRCostLess(C, Other.C);
+ return TTI->isLSRCostLess(C, Other.C);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1888,8 +1909,11 @@ class LSRInstance {
ScalarEvolution &SE;
DominatorTree &DT;
LoopInfo &LI;
+ AssumptionCache &AC;
+ TargetLibraryInfo &LibInfo;
const TargetTransformInfo &TTI;
Loop *const L;
+ bool FavorBackedgeIndex = false;
bool Changed = false;
/// This is the insert position that the current loop's induction variable
@@ -1910,7 +1934,7 @@ class LSRInstance {
SmallSetVector<Type *, 4> Types;
/// The list of interesting uses.
- SmallVector<LSRUse, 16> Uses;
+ mutable SmallVector<LSRUse, 16> Uses;
/// Track which uses use which register candidates.
RegUseTracker RegUses;
@@ -2025,7 +2049,8 @@ class LSRInstance {
public:
LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
- LoopInfo &LI, const TargetTransformInfo &TTI);
+ LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC,
+ TargetLibraryInfo &LibInfo);
bool getChanged() const { return Changed; }
@@ -2804,7 +2829,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
/// TODO: Consider IVInc free if it's already used in another chains.
static bool
isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
- ScalarEvolution &SE, const TargetTransformInfo &TTI) {
+ ScalarEvolution &SE) {
if (StressIVChain)
return true;
@@ -3064,7 +3089,7 @@ void LSRInstance::CollectChains() {
for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
UsersIdx < NChains; ++UsersIdx) {
if (!isProfitableChain(IVChainVec[UsersIdx],
- ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
+ ChainUsersVec[UsersIdx].FarUsers, SE))
continue;
// Preserve the chain at UsesIdx.
if (ChainIdx != UsersIdx)
@@ -3078,7 +3103,7 @@ void LSRInstance::CollectChains() {
void LSRInstance::FinalizeChain(IVChain &Chain) {
assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
-
+
for (const IVInc &Inc : Chain) {
LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n");
auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
@@ -3100,7 +3125,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand);
int64_t IncOffset = IncConst->getValue()->getSExtValue();
if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
- IncOffset, /*HaseBaseReg=*/false))
+ IncOffset, /*HasBaseReg=*/false))
return false;
return true;
@@ -3210,6 +3235,9 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
}
void LSRInstance::CollectFixupsAndInitialFormulae() {
+ BranchInst *ExitBranch = nullptr;
+ bool SaveCmp = TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &LibInfo);
+
for (const IVStrideUse &U : IU) {
Instruction *UserInst = U.getUser();
// Skip IV users that are part of profitable IV Chains.
@@ -3239,6 +3267,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// equality icmps, thanks to IndVarSimplify.
if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst))
if (CI->isEquality()) {
+ // If CI can be saved in some target, like replaced inside hardware loop
+ // in PowerPC, no need to generate initial formulae for it.
+ if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->getCondition()))
+ continue;
// Swap the operands if needed to put the OperandValToReplace on the
// left, for consistency.
Value *NV = CI->getOperand(1);
@@ -3738,10 +3770,11 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
void LSRInstance::GenerateConstantOffsetsImpl(
LSRUse &LU, unsigned LUIdx, const Formula &Base,
const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
- const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
- for (int64_t Offset : Worklist) {
+
+ auto GenerateOffset = [&](const SCEV *G, int64_t Offset) {
Formula F = Base;
F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
+
if (isLegalUse(TTI, LU.MinOffset - Offset, LU.MaxOffset - Offset, LU.Kind,
LU.AccessTy, F)) {
// Add the offset to the base register.
@@ -3761,7 +3794,35 @@ void LSRInstance::GenerateConstantOffsetsImpl(
(void)InsertFormula(LU, LUIdx, F);
}
+ };
+
+ const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+
+ // With constant offsets and constant steps, we can generate pre-inc
+ // accesses by having the offset equal the step. So, for access #0 with a
+ // step of 8, we generate a G - 8 base which would require the first access
+ // to be ((G - 8) + 8),+,8. The pre-indexed access then updates the pointer
+ // for itself and hopefully becomes the base for other accesses. This means
+ // means that a single pre-indexed access can be generated to become the new
+ // base pointer for each iteration of the loop, resulting in no extra add/sub
+ // instructions for pointer updating.
+ if (FavorBackedgeIndex && LU.Kind == LSRUse::Address) {
+ if (auto *GAR = dyn_cast<SCEVAddRecExpr>(G)) {
+ if (auto *StepRec =
+ dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
+ const APInt &StepInt = StepRec->getAPInt();
+ int64_t Step = StepInt.isNegative() ?
+ StepInt.getSExtValue() : StepInt.getZExtValue();
+
+ for (int64_t Offset : Worklist) {
+ Offset -= Step;
+ GenerateOffset(G, Offset);
+ }
+ }
+ }
}
+ for (int64_t Offset : Worklist)
+ GenerateOffset(G, Offset);
int64_t Imm = ExtractImmediate(G, SE);
if (G->isZero() || Imm == 0)
@@ -3968,9 +4029,27 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
- if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
- for (const SCEV *&BaseReg : F.BaseRegs)
- BaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
+ // Sometimes SCEV is able to prove zero during ext transform. It may
+ // happen if SCEV did not do all possible transforms while creating the
+ // initial node (maybe due to depth limitations), but it can do them while
+ // taking ext.
+ if (F.ScaledReg) {
+ const SCEV *NewScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
+ if (NewScaledReg->isZero())
+ continue;
+ F.ScaledReg = NewScaledReg;
+ }
+ bool HasZeroBaseReg = false;
+ for (const SCEV *&BaseReg : F.BaseRegs) {
+ const SCEV *NewBaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
+ if (NewBaseReg->isZero()) {
+ HasZeroBaseReg = true;
+ break;
+ }
+ BaseReg = NewBaseReg;
+ }
+ if (HasZeroBaseReg)
+ continue;
// TODO: This assumes we've done basic processing on all uses and
// have an idea what the register usage is.
@@ -4067,11 +4146,17 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// Conservatively examine offsets between this orig reg a few selected
// other orig regs.
+ int64_t First = Imms.begin()->first;
+ int64_t Last = std::prev(Imms.end())->first;
+ // Compute (First + Last) / 2 without overflow using the fact that
+ // First + Last = 2 * (First + Last) + (First ^ Last).
+ int64_t Avg = (First & Last) + ((First ^ Last) >> 1);
+ // If the result is negative and First is odd and Last even (or vice versa),
+ // we rounded towards -inf. Add 1 in that case, to round towards 0.
+ Avg = Avg + ((First ^ Last) & ((uint64_t)Avg >> 63));
ImmMapTy::const_iterator OtherImms[] = {
- Imms.begin(), std::prev(Imms.end()),
- Imms.lower_bound((Imms.begin()->first + std::prev(Imms.end())->first) /
- 2)
- };
+ Imms.begin(), std::prev(Imms.end()),
+ Imms.lower_bound(Avg)};
for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
ImmMapTy::const_iterator M = OtherImms[i];
if (M == J || M == JE) continue;
@@ -4249,9 +4334,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
// avoids the need to recompute this information across formulae using the
// same bad AddRec. Passing LoserRegs is also essential unless we remove
// the corresponding bad register from the Regs set.
- Cost CostF;
+ Cost CostF(L, SE, TTI);
Regs.clear();
- CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, SE, DT, LU, &LoserRegs);
+ CostF.RateFormula(F, Regs, VisitedRegs, LU, &LoserRegs);
if (CostF.isLoser()) {
// During initial formula generation, undesirable formulae are generated
// by uses within other loops that have some non-trivial address mode or
@@ -4282,10 +4367,10 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
Formula &Best = LU.Formulae[P.first->second];
- Cost CostBest;
+ Cost CostBest(L, SE, TTI);
Regs.clear();
- CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU);
- if (CostF.isLess(CostBest, TTI))
+ CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
+ if (CostF.isLess(CostBest))
std::swap(F, Best);
LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
@@ -4357,7 +4442,9 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
Formula NewF = F;
- NewF.BaseOffset += C->getValue()->getSExtValue();
+ //FIXME: Formulas should store bitwidth to do wrapping properly.
+ // See PR41034.
+ NewF.BaseOffset += (uint64_t)C->getValue()->getSExtValue();
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
@@ -4400,7 +4487,7 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
/// When there are many registers for expressions like A, A+1, A+2, etc.,
/// allocate a single register for them.
void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
- if (EstimateSearchSpaceComplexity() < ComplexityLimit)
+ if (EstimateSearchSpaceComplexity() < ComplexityLimit)
return;
LLVM_DEBUG(
@@ -4533,12 +4620,13 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
// If the new register numbers are the same, choose the Formula with
// less Cost.
- Cost CostFA, CostFB;
+ Cost CostFA(L, SE, TTI);
+ Cost CostFB(L, SE, TTI);
Regs.clear();
- CostFA.RateFormula(TTI, FA, Regs, VisitedRegs, L, SE, DT, LU);
+ CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
Regs.clear();
- CostFB.RateFormula(TTI, FB, Regs, VisitedRegs, L, SE, DT, LU);
- return CostFA.isLess(CostFB, TTI);
+ CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
+ return CostFA.isLess(CostFB);
};
bool Any = false;
@@ -4824,7 +4912,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
ReqRegs.insert(S);
SmallPtrSet<const SCEV *, 16> NewRegs;
- Cost NewCost;
+ Cost NewCost(L, SE, TTI);
for (const Formula &F : LU.Formulae) {
// Ignore formulae which may not be ideal in terms of register reuse of
// ReqRegs. The formula should use all required registers before
@@ -4848,8 +4936,8 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
// the current best, prune the search at that point.
NewCost = CurCost;
NewRegs = CurRegs;
- NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU);
- if (NewCost.isLess(SolutionCost, TTI)) {
+ NewCost.RateFormula(F, NewRegs, VisitedRegs, LU);
+ if (NewCost.isLess(SolutionCost)) {
Workspace.push_back(&F);
if (Workspace.size() != Uses.size()) {
SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
@@ -4858,9 +4946,9 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
} else {
LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
- dbgs() << ".\n Regs:"; for (const SCEV *S
- : NewRegs) dbgs()
- << ' ' << *S;
+ dbgs() << ".\nRegs:\n";
+ for (const SCEV *S : NewRegs) dbgs()
+ << "- " << *S << "\n";
dbgs() << '\n');
SolutionCost = NewCost;
@@ -4875,9 +4963,9 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
/// vector.
void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
SmallVector<const Formula *, 8> Workspace;
- Cost SolutionCost;
+ Cost SolutionCost(L, SE, TTI);
SolutionCost.Lose();
- Cost CurCost;
+ Cost CurCost(L, SE, TTI);
SmallPtrSet<const SCEV *, 16> CurRegs;
DenseSet<const SCEV *> VisitedRegs;
Workspace.reserve(Uses.size());
@@ -5215,6 +5303,7 @@ void LSRInstance::RewriteForPHI(
DenseMap<BasicBlock *, Value *> Inserted;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
+ bool needUpdateFixups = false;
BasicBlock *BB = PN->getIncomingBlock(i);
// If this is a critical edge, split the edge so that we do not insert
@@ -5233,7 +5322,7 @@ void LSRInstance::RewriteForPHI(
NewBB = SplitCriticalEdge(BB, Parent,
CriticalEdgeSplittingOptions(&DT, &LI)
.setMergeIdenticalEdges()
- .setDontDeleteUselessPHIs());
+ .setKeepOneInputPHIs());
} else {
SmallVector<BasicBlock*, 2> NewBBs;
SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI);
@@ -5253,6 +5342,8 @@ void LSRInstance::RewriteForPHI(
e = PN->getNumIncomingValues();
BB = NewBB;
i = PN->getBasicBlockIndex(BB);
+
+ needUpdateFixups = true;
}
}
}
@@ -5277,6 +5368,44 @@ void LSRInstance::RewriteForPHI(
PN->setIncomingValue(i, FullV);
Pair.first->second = FullV;
}
+
+ // If LSR splits critical edge and phi node has other pending
+ // fixup operands, we need to update those pending fixups. Otherwise
+ // formulae will not be implemented completely and some instructions
+ // will not be eliminated.
+ if (needUpdateFixups) {
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
+ for (LSRFixup &Fixup : Uses[LUIdx].Fixups)
+ // If fixup is supposed to rewrite some operand in the phi
+ // that was just updated, it may be already moved to
+ // another phi node. Such fixup requires update.
+ if (Fixup.UserInst == PN) {
+ // Check if the operand we try to replace still exists in the
+ // original phi.
+ bool foundInOriginalPHI = false;
+ for (const auto &val : PN->incoming_values())
+ if (val == Fixup.OperandValToReplace) {
+ foundInOriginalPHI = true;
+ break;
+ }
+
+ // If fixup operand found in original PHI - nothing to do.
+ if (foundInOriginalPHI)
+ continue;
+
+ // Otherwise it might be moved to another PHI and requires update.
+ // If fixup operand not found in any of the incoming blocks that
+ // means we have already rewritten it - nothing to do.
+ for (const auto &Block : PN->blocks())
+ for (BasicBlock::iterator I = Block->begin(); isa<PHINode>(I);
+ ++I) {
+ PHINode *NewPN = cast<PHINode>(I);
+ for (const auto &val : NewPN->incoming_values())
+ if (val == Fixup.OperandValToReplace)
+ Fixup.UserInst = NewPN;
+ }
+ }
+ }
}
}
@@ -5360,8 +5489,11 @@ void LSRInstance::ImplementSolution(
LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
DominatorTree &DT, LoopInfo &LI,
- const TargetTransformInfo &TTI)
- : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L) {
+ const TargetTransformInfo &TTI, AssumptionCache &AC,
+ TargetLibraryInfo &LibInfo)
+ : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), LibInfo(LibInfo), TTI(TTI), L(L),
+ FavorBackedgeIndex(EnableBackedgeIndexing &&
+ TTI.shouldFavorBackedgeIndex(L)) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;
@@ -5556,6 +5688,8 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
// Requiring LoopSimplify a second time here prevents IVUsers from running
// twice, since LoopSimplify was invalidated by running ScalarEvolution.
AU.addRequiredID(LoopSimplifyID);
@@ -5566,11 +5700,14 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
DominatorTree &DT, LoopInfo &LI,
- const TargetTransformInfo &TTI) {
+ const TargetTransformInfo &TTI,
+ AssumptionCache &AC,
+ TargetLibraryInfo &LibInfo) {
+
bool Changed = false;
// Run the main LSR transformation.
- Changed |= LSRInstance(L, IU, SE, DT, LI, TTI).getChanged();
+ Changed |= LSRInstance(L, IU, SE, DT, LI, TTI, AC, LibInfo).getChanged();
// Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader());
@@ -5601,14 +5738,17 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*L->getHeader()->getParent());
- return ReduceLoopStrength(L, IU, SE, DT, LI, TTI);
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
+ *L->getHeader()->getParent());
+ auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, LibInfo);
}
PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
if (!ReduceLoopStrength(&L, AM.getResult<IVUsersAnalysis>(L, AR), AR.SE,
- AR.DT, AR.LI, AR.TTI))
+ AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI))
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
diff --git a/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index da46210b6fdd..86891eb451bb 100644
--- a/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -1,9 +1,8 @@
//===- LoopUnrollAndJam.cpp - Loop unroll and jam pass --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -295,7 +294,8 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
return LoopUnrollResult::Unmodified;
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
- L, SE, TTI, OptLevel, None, None, None, None, None, None);
+ L, SE, TTI, nullptr, nullptr, OptLevel,
+ None, None, None, None, None, None);
if (AllowUnrollAndJam.getNumOccurrences() > 0)
UP.UnrollAndJam = AllowUnrollAndJam;
if (UnrollAndJamThreshold.getNumOccurrences() > 0)
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 38b80f48ed0e..2fa7436213dd 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1,9 +1,8 @@
//===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,7 +23,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -56,6 +57,7 @@
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <algorithm>
#include <cassert>
@@ -69,6 +71,12 @@ using namespace llvm;
#define DEBUG_TYPE "loop-unroll"
+cl::opt<bool> llvm::ForgetSCEVInLoopUnroll(
+ "forget-scev-loop-unroll", cl::init(false), cl::Hidden,
+ cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
+ " the current top-most loop. This is somtimes preferred to reduce"
+ " compile time."));
+
static cl::opt<unsigned>
UnrollThreshold("unroll-threshold", cl::Hidden,
cl::desc("The cost threshold for loop unrolling"));
@@ -166,7 +174,8 @@ static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
/// Gather the various unrolling parameters based on the defaults, compiler
/// flags, TTI overrides and user specified parameters.
TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
- Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
+ Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) {
@@ -199,9 +208,12 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
TTI.getUnrollingPreferences(L, SE, UP);
// Apply size attributes
- if (L->getHeader()->getParent()->optForSize()) {
+ bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI);
+ if (OptForSize) {
UP.Threshold = UP.OptSizeThreshold;
UP.PartialThreshold = UP.PartialOptSizeThreshold;
+ UP.MaxPercentThresholdBoost = 100;
}
// Apply any user values specified by cl::opt
@@ -964,8 +976,10 @@ bool llvm::computeUnrollCount(
static LoopUnrollResult tryToUnrollLoop(
Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
const TargetTransformInfo &TTI, AssumptionCache &AC,
- OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel,
- bool OnlyWhenForced, Optional<unsigned> ProvidedCount,
+ OptimizationRemarkEmitter &ORE,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ bool PreserveLCSSA, int OptLevel,
+ bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount,
Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
Optional<bool> ProvidedAllowPeeling) {
@@ -986,15 +1000,19 @@ static LoopUnrollResult tryToUnrollLoop(
if (OnlyWhenForced && !(TM & TM_Enable))
return LoopUnrollResult::Unmodified;
+ bool OptForSize = L->getHeader()->getParent()->hasOptSize();
unsigned NumInlineCandidates;
bool NotDuplicatable;
bool Convergent;
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
- L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount,
+ L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
ProvidedAllowPeeling);
- // Exit early if unrolling is disabled.
- if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
+
+ // Exit early if unrolling is disabled. For OptForSize, we pick the loop size
+ // as threshold later on.
+ if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) &&
+ !OptForSize)
return LoopUnrollResult::Unmodified;
SmallPtrSet<const Value *, 32> EphValues;
@@ -1009,6 +1027,12 @@ static LoopUnrollResult tryToUnrollLoop(
<< " instructions.\n");
return LoopUnrollResult::Unmodified;
}
+
+ // When optimizing for size, use LoopSize as threshold, to (fully) unroll
+ // loops, if it does not increase code size.
+ if (OptForSize)
+ UP.Threshold = std::max(UP.Threshold, LoopSize);
+
if (NumInlineCandidates != 0) {
LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
return LoopUnrollResult::Unmodified;
@@ -1081,8 +1105,10 @@ static LoopUnrollResult tryToUnrollLoop(
// Unroll the loop.
Loop *RemainderLoop = nullptr;
LoopUnrollResult UnrollResult = UnrollLoop(
- L, UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
- UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
+ L,
+ {UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
+ UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
+ ForgetAllSCEV},
LI, &SE, &DT, &AC, &ORE, PreserveLCSSA, &RemainderLoop);
if (UnrollResult == LoopUnrollResult::Unmodified)
return LoopUnrollResult::Unmodified;
@@ -1132,6 +1158,11 @@ public:
/// metadata are considered. All other loops are skipped.
bool OnlyWhenForced;
+ /// If false, when SCEV is invalidated, only forget everything in the
+ /// top-most loop (call forgetTopMostLoop), of the loop being processed.
+ /// Otherwise, forgetAllLoops and rebuild when needed next.
+ bool ForgetAllSCEV;
+
Optional<unsigned> ProvidedCount;
Optional<unsigned> ProvidedThreshold;
Optional<bool> ProvidedAllowPartial;
@@ -1140,15 +1171,16 @@ public:
Optional<bool> ProvidedAllowPeeling;
LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
- Optional<unsigned> Threshold = None,
+ bool ForgetAllSCEV = false, Optional<unsigned> Threshold = None,
Optional<unsigned> Count = None,
Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
Optional<bool> UpperBound = None,
Optional<bool> AllowPeeling = None)
: LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
- ProvidedCount(std::move(Count)), ProvidedThreshold(Threshold),
- ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime),
- ProvidedUpperBound(UpperBound), ProvidedAllowPeeling(AllowPeeling) {
+ ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
+ ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
+ ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
+ ProvidedAllowPeeling(AllowPeeling) {
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
@@ -1171,9 +1203,10 @@ public:
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
LoopUnrollResult Result = tryToUnrollLoop(
- L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, OnlyWhenForced,
- ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
- ProvidedUpperBound, ProvidedAllowPeeling);
+ L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr,
+ PreserveLCSSA, OptLevel, OnlyWhenForced,
+ ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial,
+ ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling);
if (Result == LoopUnrollResult::FullyUnrolled)
LPM.markLoopAsDeleted(*L);
@@ -1203,14 +1236,14 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
- int Threshold, int Count, int AllowPartial,
- int Runtime, int UpperBound,
+ bool ForgetAllSCEV, int Threshold, int Count,
+ int AllowPartial, int Runtime, int UpperBound,
int AllowPeeling) {
// TODO: It would make more sense for this function to take the optionals
// directly, but that's dangerous since it would silently break out of tree
// callers.
return new LoopUnroll(
- OptLevel, OnlyWhenForced,
+ OptLevel, OnlyWhenForced, ForgetAllSCEV,
Threshold == -1 ? None : Optional<unsigned>(Threshold),
Count == -1 ? None : Optional<unsigned>(Count),
AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
@@ -1219,8 +1252,10 @@ Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
}
-Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced) {
- return createLoopUnrollPass(OptLevel, OnlyWhenForced, -1, -1, 0, 0, 0, 0);
+Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
+ bool ForgetAllSCEV) {
+ return createLoopUnrollPass(OptLevel, OnlyWhenForced, ForgetAllSCEV, -1, -1,
+ 0, 0, 0, 0);
}
PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
@@ -1250,8 +1285,9 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
bool Changed =
tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
+ /*BFI*/ nullptr, /*PSI*/ nullptr,
/*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
- /*Count*/ None,
+ ForgetSCEV, /*Count*/ None,
/*Threshold*/ None, /*AllowPartial*/ false,
/*Runtime*/ false, /*UpperBound*/ false,
/*AllowPeeling*/ false) != LoopUnrollResult::Unmodified;
@@ -1352,6 +1388,8 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
ProfileSummaryInfo *PSI =
MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+ &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
bool Changed = false;
@@ -1361,7 +1399,8 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
// will simplify all loops, regardless of whether anything end up being
// unrolled.
for (auto &L : LI) {
- Changed |= simplifyLoop(L, &DT, &LI, &SE, &AC, false /* PreserveLCSSA */);
+ Changed |=
+ simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
}
@@ -1387,9 +1426,9 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
// The API here is quite complex to call and we allow to select some
// flavors of unrolling during construction time (by setting UnrollOpts).
LoopUnrollResult Result = tryToUnrollLoop(
- &L, DT, &LI, SE, TTI, AC, ORE,
+ &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
/*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
- /*Count*/ None,
+ UnrollOpts.ForgetSCEV, /*Count*/ None,
/*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
UnrollOpts.AllowUpperBound, LocalAllowPeeling);
Changed |= Result != LoopUnrollResult::Unmodified;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 4a089dfa7dbf..b5b8e720069c 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -1,9 +1,8 @@
//===- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -658,7 +657,7 @@ bool LoopUnswitch::processCurrentLoop() {
}
// Do not do non-trivial unswitch while optimizing for size.
- // FIXME: Use Function::optForSize().
+ // FIXME: Use Function::hasOptSize().
if (OptimizeForSize ||
loopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
return false;
@@ -1405,8 +1404,8 @@ static void RemoveFromWorklist(Instruction *I,
/// When we find that I really equals V, remove I from the
/// program, replacing all uses with V and update the worklist.
static void ReplaceUsesOfWith(Instruction *I, Value *V,
- std::vector<Instruction*> &Worklist,
- Loop *L, LPPassManager *LPM) {
+ std::vector<Instruction *> &Worklist, Loop *L,
+ LPPassManager *LPM, MemorySSAUpdater *MSSAU) {
LLVM_DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n");
// Add uses to the worklist, which may be dead now.
@@ -1420,8 +1419,11 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
LPM->deleteSimpleAnalysisValue(I, L);
RemoveFromWorklist(I, Worklist);
I->replaceAllUsesWith(V);
- if (!I->mayHaveSideEffects())
+ if (!I->mayHaveSideEffects()) {
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(I);
I->eraseFromParent();
+ }
++NumSimplify;
}
@@ -1548,8 +1550,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
ConstantInt::getTrue(Context), NewSISucc);
// Release the PHI operands for this edge.
for (PHINode &PN : NewSISucc->phis())
- PN.setIncomingValue(PN.getBasicBlockIndex(Switch),
- UndefValue::get(PN.getType()));
+ PN.setIncomingValueForBlock(Switch, UndefValue::get(PN.getType()));
// Tell the domtree about the new block. We don't fully update the
// domtree here -- instead we force it to do a full recomputation
// after the pass is complete -- but we do need to inform it of
@@ -1596,7 +1597,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
// 'false'. TODO: update the domtree properly so we can pass it here.
if (Value *V = SimplifyInstruction(I, DL))
if (LI->replacementPreservesLCSSAForm(I, V)) {
- ReplaceUsesOfWith(I, V, Worklist, L, LPM);
+ ReplaceUsesOfWith(I, V, Worklist, L, LPM, MSSAU.get());
continue;
}
@@ -1616,7 +1617,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
// Resolve any single entry PHI nodes in Succ.
while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
- ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
+ ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM,
+ MSSAU.get());
// If Succ has any successors with PHI nodes, update them to have
// entries coming from Pred instead of Succ.
diff --git a/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 83861b98fbd8..896dd8bcb922 100644
--- a/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -1,9 +1,8 @@
//===- LoopVersioningLICM.cpp - LICM Loop Versioning ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -357,14 +356,22 @@ bool LoopVersioningLICM::legalLoopMemoryAccesses() {
/// 1) Check all load store in loop body are non atomic & non volatile.
/// 2) Check function call safety, by ensuring its not accessing memory.
/// 3) Loop body shouldn't have any may throw instruction.
+/// 4) Loop body shouldn't have any convergent or noduplicate instructions.
bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) {
assert(I != nullptr && "Null instruction found!");
// Check function call safety
- if (auto *Call = dyn_cast<CallBase>(I))
+ if (auto *Call = dyn_cast<CallBase>(I)) {
+ if (Call->isConvergent() || Call->cannotDuplicate()) {
+ LLVM_DEBUG(dbgs() << " Convergent call site found.\n");
+ return false;
+ }
+
if (!AA->doesNotAccessMemory(Call)) {
LLVM_DEBUG(dbgs() << " Unsafe call site found.\n");
return false;
}
+ }
+
// Avoid loops with possiblity of throw
if (I->mayThrow()) {
LLVM_DEBUG(dbgs() << " May throw instruction found in loop body\n");
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index c165c5ece95c..e076424d9042 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -1,9 +1,8 @@
//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,7 +26,7 @@ static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
Value *Cmp = CXI->getCompareOperand();
Value *Val = CXI->getNewValOperand();
- LoadInst *Orig = Builder.CreateLoad(Ptr);
+ LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr);
Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
Value *Res = Builder.CreateSelect(Equal, Val, Orig);
Builder.CreateStore(Res, Ptr);
@@ -45,7 +44,7 @@ static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
Value *Ptr = RMWI->getPointerOperand();
Value *Val = RMWI->getValOperand();
- LoadInst *Orig = Builder.CreateLoad(Ptr);
+ LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr);
Value *Res = nullptr;
switch (RMWI->getOperation()) {
@@ -87,6 +86,12 @@ static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
Orig, Val);
break;
+ case AtomicRMWInst::FAdd:
+ Res = Builder.CreateFAdd(Orig, Val);
+ break;
+ case AtomicRMWInst::FSub:
+ Res = Builder.CreateFSub(Orig, Val);
+ break;
}
Builder.CreateStore(Res, Ptr);
RMWI->replaceAllUsesWith(Orig);
diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 68bfa0030395..0d67c0d740ec 100644
--- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -1,9 +1,8 @@
//===- LowerExpectIntrinsic.cpp - Lower expect intrinsic ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp b/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
index 4867b33d671f..9489e01774d6 100644
--- a/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
@@ -1,9 +1,8 @@
//===- LowerGuardIntrinsic.cpp - Lower the guard intrinsic ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/LowerWidenableCondition.cpp b/lib/Transforms/Scalar/LowerWidenableCondition.cpp
new file mode 100644
index 000000000000..5342f2ddcb6b
--- /dev/null
+++ b/lib/Transforms/Scalar/LowerWidenableCondition.cpp
@@ -0,0 +1,85 @@
+//===- LowerWidenableCondition.cpp - Lower the guard intrinsic ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the llvm.widenable.condition intrinsic to default value
+// which is i1 true.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LowerWidenableCondition.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/GuardUtils.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/GuardUtils.h"
+
+using namespace llvm;
+
+namespace {
+struct LowerWidenableConditionLegacyPass : public FunctionPass {
+ static char ID;
+ LowerWidenableConditionLegacyPass() : FunctionPass(ID) {
+ initializeLowerWidenableConditionLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+};
+}
+
+static bool lowerWidenableCondition(Function &F) {
+ // Check if we can cheaply rule out the possibility of not having any work to
+ // do.
+ auto *WCDecl = F.getParent()->getFunction(
+ Intrinsic::getName(Intrinsic::experimental_widenable_condition));
+ if (!WCDecl || WCDecl->use_empty())
+ return false;
+
+ using namespace llvm::PatternMatch;
+ SmallVector<CallInst *, 8> ToLower;
+ for (auto &I : instructions(F))
+ if (match(&I, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
+ ToLower.push_back(cast<CallInst>(&I));
+
+ if (ToLower.empty())
+ return false;
+
+ for (auto *CI : ToLower) {
+ CI->replaceAllUsesWith(ConstantInt::getTrue(CI->getContext()));
+ CI->eraseFromParent();
+ }
+ return true;
+}
+
+bool LowerWidenableConditionLegacyPass::runOnFunction(Function &F) {
+ return lowerWidenableCondition(F);
+}
+
+char LowerWidenableConditionLegacyPass::ID = 0;
+INITIALIZE_PASS(LowerWidenableConditionLegacyPass, "lower-widenable-condition",
+ "Lower the widenable condition to default true value", false,
+ false)
+
+Pass *llvm::createLowerWidenableConditionPass() {
+ return new LowerWidenableConditionLegacyPass();
+}
+
+PreservedAnalyses LowerWidenableConditionPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ if (lowerWidenableCondition(F))
+ return PreservedAnalyses::none();
+
+ return PreservedAnalyses::all();
+}
diff --git a/lib/Transforms/Scalar/MakeGuardsExplicit.cpp b/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
index 1ba3994eba0e..789232e0f5ce 100644
--- a/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
+++ b/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
@@ -1,9 +1,8 @@
//===- MakeGuardsExplicit.cpp - Turn guard intrinsics into guard branches -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index ced923d6973d..5a055139be4f 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1,9 +1,8 @@
//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -279,8 +278,8 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
unsigned Alignment, Instruction *Inst) {
int64_t End = Start+Size;
- range_iterator I = std::lower_bound(Ranges.begin(), Ranges.end(), Start,
- [](const MemsetRange &LHS, int64_t RHS) { return LHS.End < RHS; });
+ range_iterator I = partition_point(
+ Ranges, [=](const MemsetRange &O) { return O.End < Start; });
// We now know that I == E, in which case we didn't find anything to merge
// with, or that Start <= I->End. If End < I->Start or I == E, then we need
@@ -413,7 +412,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
if (!NextStore->isSimple()) break;
// Check to see if this stored value is of the same byte-splattable value.
- Value *StoredByte = isBytewiseValue(NextStore->getOperand(0));
+ Value *StoredByte = isBytewiseValue(NextStore->getOperand(0), DL);
if (isa<UndefValue>(ByteVal) && StoredByte)
ByteVal = StoredByte;
if (ByteVal != StoredByte)
@@ -750,7 +749,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// byte at a time like "0" or "-1" or any width, as well as things like
// 0xA0A0A0A0 and 0.0.
auto *V = SI->getOperand(0);
- if (Value *ByteVal = isBytewiseValue(V)) {
+ if (Value *ByteVal = isBytewiseValue(V, DL)) {
if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
ByteVal)) {
BBI = I->getIterator(); // Don't invalidate iterator.
@@ -1135,8 +1134,10 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
Value *MemsetLen = Builder.CreateSelect(
Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
- Builder.CreateMemSet(Builder.CreateGEP(Dest, SrcSize), MemSet->getOperand(1),
- MemsetLen, Align);
+ Builder.CreateMemSet(
+ Builder.CreateGEP(Dest->getType()->getPointerElementType(), Dest,
+ SrcSize),
+ MemSet->getOperand(1), MemsetLen, Align);
MD->removeInstruction(MemSet);
MemSet->eraseFromParent();
@@ -1228,7 +1229,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
// If copying from a constant, try to turn the memcpy into a memset.
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
if (GV->isConstant() && GV->hasDefinitiveInitializer())
- if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
+ if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
+ M->getModule()->getDataLayout())) {
IRBuilder<> Builder(M);
Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
M->getDestAlignment(), false);
diff --git a/lib/Transforms/Scalar/MergeICmps.cpp b/lib/Transforms/Scalar/MergeICmps.cpp
index 69fd8b163a07..3d047a193267 100644
--- a/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/lib/Transforms/Scalar/MergeICmps.cpp
@@ -1,9 +1,8 @@
//===- MergeICmps.cpp - Optimize chains of integer comparisons ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,29 +10,54 @@
// later typically inlined as a chain of efficient hardware comparisons). This
// typically benefits c++ member or nonmember operator==().
//
-// The basic idea is to replace a larger chain of integer comparisons loaded
-// from contiguous memory locations into a smaller chain of such integer
+// The basic idea is to replace a longer chain of integer comparisons loaded
+// from contiguous memory locations into a shorter chain of larger integer
// comparisons. Benefits are double:
// - There are less jumps, and therefore less opportunities for mispredictions
// and I-cache misses.
// - Code size is smaller, both because jumps are removed and because the
// encoding of a 2*n byte compare is smaller than that of two n-byte
// compares.
-
+//
+// Example:
+//
+// struct S {
+// int a;
+// char b;
+// char c;
+// uint16_t d;
+// bool operator==(const S& o) const {
+// return a == o.a && b == o.b && c == o.c && d == o.d;
+// }
+// };
+//
+// Is optimized as :
+//
+// bool S::operator==(const S& o) const {
+// return memcmp(this, &o, 8) == 0;
+// }
+//
+// Which will later be expanded (ExpandMemCmp) as a single 8-bytes icmp.
+//
//===----------------------------------------------------------------------===//
-#include <algorithm>
-#include <numeric>
-#include <utility>
-#include <vector>
+#include "llvm/Transforms/Scalar/MergeICmps.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include <algorithm>
+#include <numeric>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -50,76 +74,109 @@ static bool isSimpleLoadOrStore(const Instruction *I) {
return false;
}
-// A BCE atom.
+// A BCE atom "Binary Compare Expression Atom" represents an integer load
+// that is a constant offset from a base value, e.g. `a` or `o.c` in the example
+// at the top.
struct BCEAtom {
- BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {}
-
- const Value *Base() const { return GEP ? GEP->getPointerOperand() : nullptr; }
-
+ BCEAtom() = default;
+ BCEAtom(GetElementPtrInst *GEP, LoadInst *LoadI, int BaseId, APInt Offset)
+ : GEP(GEP), LoadI(LoadI), BaseId(BaseId), Offset(Offset) {}
+
+ BCEAtom(const BCEAtom &) = delete;
+ BCEAtom &operator=(const BCEAtom &) = delete;
+
+ BCEAtom(BCEAtom &&that) = default;
+ BCEAtom &operator=(BCEAtom &&that) {
+ if (this == &that)
+ return *this;
+ GEP = that.GEP;
+ LoadI = that.LoadI;
+ BaseId = that.BaseId;
+ Offset = std::move(that.Offset);
+ return *this;
+ }
+
+ // We want to order BCEAtoms by (Base, Offset). However we cannot use
+ // the pointer values for Base because these are non-deterministic.
+ // To make sure that the sort order is stable, we first assign to each atom
+ // base value an index based on its order of appearance in the chain of
+ // comparisons. We call this index `BaseOrdering`. For example, for:
+ // b[3] == c[2] && a[1] == d[1] && b[4] == c[3]
+ // | block 1 | | block 2 | | block 3 |
+ // b gets assigned index 0 and a index 1, because b appears as LHS in block 1,
+ // which is before block 2.
+ // We then sort by (BaseOrdering[LHS.Base()], LHS.Offset), which is stable.
bool operator<(const BCEAtom &O) const {
- assert(Base() && "invalid atom");
- assert(O.Base() && "invalid atom");
- // Just ordering by (Base(), Offset) is sufficient. However because this
- // means that the ordering will depend on the addresses of the base
- // values, which are not reproducible from run to run. To guarantee
- // stability, we use the names of the values if they exist; we sort by:
- // (Base.getName(), Base(), Offset).
- const int NameCmp = Base()->getName().compare(O.Base()->getName());
- if (NameCmp == 0) {
- if (Base() == O.Base()) {
- return Offset.slt(O.Offset);
- }
- return Base() < O.Base();
- }
- return NameCmp < 0;
+ return BaseId != O.BaseId ? BaseId < O.BaseId : Offset.slt(O.Offset);
}
- GetElementPtrInst *GEP;
- LoadInst *LoadI;
+ GetElementPtrInst *GEP = nullptr;
+ LoadInst *LoadI = nullptr;
+ unsigned BaseId = 0;
APInt Offset;
};
+// A class that assigns increasing ids to values in the order in which they are
+// seen. See comment in `BCEAtom::operator<()``.
+class BaseIdentifier {
+public:
+ // Returns the id for value `Base`, after assigning one if `Base` has not been
+ // seen before.
+ int getBaseId(const Value *Base) {
+ assert(Base && "invalid base");
+ const auto Insertion = BaseToIndex.try_emplace(Base, Order);
+ if (Insertion.second)
+ ++Order;
+ return Insertion.first->second;
+ }
+
+private:
+ unsigned Order = 1;
+ DenseMap<const Value*, int> BaseToIndex;
+};
+
// If this value is a load from a constant offset w.r.t. a base address, and
// there are no other users of the load or address, returns the base address and
// the offset.
-BCEAtom visitICmpLoadOperand(Value *const Val) {
- BCEAtom Result;
- if (auto *const LoadI = dyn_cast<LoadInst>(Val)) {
- LLVM_DEBUG(dbgs() << "load\n");
- if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
- LLVM_DEBUG(dbgs() << "used outside of block\n");
- return {};
- }
- // Do not optimize atomic loads to non-atomic memcmp
- if (!LoadI->isSimple()) {
- LLVM_DEBUG(dbgs() << "volatile or atomic\n");
- return {};
- }
- Value *const Addr = LoadI->getOperand(0);
- if (auto *const GEP = dyn_cast<GetElementPtrInst>(Addr)) {
- LLVM_DEBUG(dbgs() << "GEP\n");
- if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
- LLVM_DEBUG(dbgs() << "used outside of block\n");
- return {};
- }
- const auto &DL = GEP->getModule()->getDataLayout();
- if (!isDereferenceablePointer(GEP, DL)) {
- LLVM_DEBUG(dbgs() << "not dereferenceable\n");
- // We need to make sure that we can do comparison in any order, so we
- // require memory to be unconditionnally dereferencable.
- return {};
- }
- Result.Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
- if (GEP->accumulateConstantOffset(DL, Result.Offset)) {
- Result.GEP = GEP;
- Result.LoadI = LoadI;
- }
- }
+BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) {
+ auto *const LoadI = dyn_cast<LoadInst>(Val);
+ if (!LoadI)
+ return {};
+ LLVM_DEBUG(dbgs() << "load\n");
+ if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
+ LLVM_DEBUG(dbgs() << "used outside of block\n");
+ return {};
+ }
+ // Do not optimize atomic loads to non-atomic memcmp
+ if (!LoadI->isSimple()) {
+ LLVM_DEBUG(dbgs() << "volatile or atomic\n");
+ return {};
}
- return Result;
+ Value *const Addr = LoadI->getOperand(0);
+ auto *const GEP = dyn_cast<GetElementPtrInst>(Addr);
+ if (!GEP)
+ return {};
+ LLVM_DEBUG(dbgs() << "GEP\n");
+ if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
+ LLVM_DEBUG(dbgs() << "used outside of block\n");
+ return {};
+ }
+ const auto &DL = GEP->getModule()->getDataLayout();
+ if (!isDereferenceablePointer(GEP, LoadI->getType(), DL)) {
+ LLVM_DEBUG(dbgs() << "not dereferenceable\n");
+ // We need to make sure that we can do comparison in any order, so we
+ // require memory to be unconditionnally dereferencable.
+ return {};
+ }
+ APInt Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
+ if (!GEP->accumulateConstantOffset(DL, Offset))
+ return {};
+ return BCEAtom(GEP, LoadI, BaseId.getBaseId(GEP->getPointerOperand()),
+ Offset);
}
-// A basic block with a comparison between two BCE atoms.
+// A basic block with a comparison between two BCE atoms, e.g. `a == o.a` in the
+// example at the top.
// The block might do extra work besides the atom comparison, in which case
// doesOtherWork() returns true. Under some conditions, the block can be
// split into the atom comparison part and the "other work" part
@@ -133,13 +190,11 @@ class BCECmpBlock {
BCECmpBlock() {}
BCECmpBlock(BCEAtom L, BCEAtom R, int SizeBits)
- : Lhs_(L), Rhs_(R), SizeBits_(SizeBits) {
+ : Lhs_(std::move(L)), Rhs_(std::move(R)), SizeBits_(SizeBits) {
if (Rhs_ < Lhs_) std::swap(Rhs_, Lhs_);
}
- bool IsValid() const {
- return Lhs_.Base() != nullptr && Rhs_.Base() != nullptr;
- }
+ bool IsValid() const { return Lhs_.BaseId != 0 && Rhs_.BaseId != 0; }
// Assert the block is consistent: If valid, it should also have
// non-null members besides Lhs_ and Rhs_.
@@ -160,19 +215,19 @@ class BCECmpBlock {
// Returns true if the non-BCE-cmp instructions can be separated from BCE-cmp
// instructions in the block.
- bool canSplit(AliasAnalysis *AA) const;
+ bool canSplit(AliasAnalysis &AA) const;
// Return true if this all the relevant instructions in the BCE-cmp-block can
// be sunk below this instruction. By doing this, we know we can separate the
// BCE-cmp-block instructions from the non-BCE-cmp-block instructions in the
// block.
bool canSinkBCECmpInst(const Instruction *, DenseSet<Instruction *> &,
- AliasAnalysis *AA) const;
+ AliasAnalysis &AA) const;
// We can separate the BCE-cmp-block instructions and the non-BCE-cmp-block
// instructions. Split the old block and move all non-BCE-cmp-insts into the
// new parent block.
- void split(BasicBlock *NewParent, AliasAnalysis *AA) const;
+ void split(BasicBlock *NewParent, AliasAnalysis &AA) const;
// The basic block where this comparison happens.
BasicBlock *BB = nullptr;
@@ -191,7 +246,7 @@ private:
bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
DenseSet<Instruction *> &BlockInsts,
- AliasAnalysis *AA) const {
+ AliasAnalysis &AA) const {
// If this instruction has side effects and its in middle of the BCE cmp block
// instructions, then bail for now.
if (Inst->mayHaveSideEffects()) {
@@ -201,9 +256,9 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
// Disallow stores that might alias the BCE operands
MemoryLocation LLoc = MemoryLocation::get(Lhs_.LoadI);
MemoryLocation RLoc = MemoryLocation::get(Rhs_.LoadI);
- if (isModSet(AA->getModRefInfo(Inst, LLoc)) ||
- isModSet(AA->getModRefInfo(Inst, RLoc)))
- return false;
+ if (isModSet(AA.getModRefInfo(Inst, LLoc)) ||
+ isModSet(AA.getModRefInfo(Inst, RLoc)))
+ return false;
}
// Make sure this instruction does not use any of the BCE cmp block
// instructions as operand.
@@ -214,7 +269,7 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
return true;
}
-void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis *AA) const {
+void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis &AA) const {
DenseSet<Instruction *> BlockInsts(
{Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
llvm::SmallVector<Instruction *, 4> OtherInsts;
@@ -234,7 +289,7 @@ void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis *AA) const {
}
}
-bool BCECmpBlock::canSplit(AliasAnalysis *AA) const {
+bool BCECmpBlock::canSplit(AliasAnalysis &AA) const {
DenseSet<Instruction *> BlockInsts(
{Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
for (Instruction &Inst : *BB) {
@@ -265,7 +320,8 @@ bool BCECmpBlock::doesOtherWork() const {
// Visit the given comparison. If this is a comparison between two valid
// BCE atoms, returns the comparison.
BCECmpBlock visitICmp(const ICmpInst *const CmpI,
- const ICmpInst::Predicate ExpectedPredicate) {
+ const ICmpInst::Predicate ExpectedPredicate,
+ BaseIdentifier &BaseId) {
// The comparison can only be used once:
// - For intermediate blocks, as a branch condition.
// - For the final block, as an incoming value for the Phi.
@@ -275,25 +331,27 @@ BCECmpBlock visitICmp(const ICmpInst *const CmpI,
LLVM_DEBUG(dbgs() << "cmp has several uses\n");
return {};
}
- if (CmpI->getPredicate() == ExpectedPredicate) {
- LLVM_DEBUG(dbgs() << "cmp "
- << (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne")
- << "\n");
- auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0));
- if (!Lhs.Base()) return {};
- auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1));
- if (!Rhs.Base()) return {};
- const auto &DL = CmpI->getModule()->getDataLayout();
- return BCECmpBlock(std::move(Lhs), std::move(Rhs),
- DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()));
- }
- return {};
+ if (CmpI->getPredicate() != ExpectedPredicate)
+ return {};
+ LLVM_DEBUG(dbgs() << "cmp "
+ << (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne")
+ << "\n");
+ auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0), BaseId);
+ if (!Lhs.BaseId)
+ return {};
+ auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1), BaseId);
+ if (!Rhs.BaseId)
+ return {};
+ const auto &DL = CmpI->getModule()->getDataLayout();
+ return BCECmpBlock(std::move(Lhs), std::move(Rhs),
+ DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()));
}
// Visit the given comparison block. If this is a comparison between two valid
// BCE atoms, returns the comparison.
BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
- const BasicBlock *const PhiBlock) {
+ const BasicBlock *const PhiBlock,
+ BaseIdentifier &BaseId) {
if (Block->empty()) return {};
auto *const BranchI = dyn_cast<BranchInst>(Block->getTerminator());
if (!BranchI) return {};
@@ -306,7 +364,7 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
auto *const CmpI = dyn_cast<ICmpInst>(Val);
if (!CmpI) return {};
LLVM_DEBUG(dbgs() << "icmp\n");
- auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ);
+ auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ, BaseId);
Result.CmpI = CmpI;
Result.BranchI = BranchI;
return Result;
@@ -323,7 +381,8 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
assert(BranchI->getNumSuccessors() == 2 && "expecting a cond branch");
BasicBlock *const FalseBlock = BranchI->getSuccessor(1);
auto Result = visitICmp(
- CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE);
+ CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
+ BaseId);
Result.CmpI = CmpI;
Result.BranchI = BranchI;
return Result;
@@ -332,47 +391,41 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
}
static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
- BCECmpBlock &Comparison) {
+ BCECmpBlock &&Comparison) {
LLVM_DEBUG(dbgs() << "Block '" << Comparison.BB->getName()
<< "': Found cmp of " << Comparison.SizeBits()
- << " bits between " << Comparison.Lhs().Base() << " + "
+ << " bits between " << Comparison.Lhs().BaseId << " + "
<< Comparison.Lhs().Offset << " and "
- << Comparison.Rhs().Base() << " + "
+ << Comparison.Rhs().BaseId << " + "
<< Comparison.Rhs().Offset << "\n");
LLVM_DEBUG(dbgs() << "\n");
- Comparisons.push_back(Comparison);
+ Comparisons.push_back(std::move(Comparison));
}
// A chain of comparisons.
class BCECmpChain {
public:
- BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
- AliasAnalysis *AA);
+ BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
+ AliasAnalysis &AA);
- int size() const { return Comparisons_.size(); }
+ int size() const { return Comparisons_.size(); }
#ifdef MERGEICMPS_DOT_ON
void dump() const;
#endif // MERGEICMPS_DOT_ON
- bool simplify(const TargetLibraryInfo *const TLI, AliasAnalysis *AA);
+ bool simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
+ DomTreeUpdater &DTU);
- private:
+private:
static bool IsContiguous(const BCECmpBlock &First,
const BCECmpBlock &Second) {
- return First.Lhs().Base() == Second.Lhs().Base() &&
- First.Rhs().Base() == Second.Rhs().Base() &&
+ return First.Lhs().BaseId == Second.Lhs().BaseId &&
+ First.Rhs().BaseId == Second.Rhs().BaseId &&
First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
}
- // Merges the given comparison blocks into one memcmp block and update
- // branches. Comparisons are assumed to be continguous. If NextBBInChain is
- // null, the merged block will link to the phi block.
- void mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
- BasicBlock *const NextBBInChain, PHINode &Phi,
- const TargetLibraryInfo *const TLI, AliasAnalysis *AA);
-
PHINode &Phi_;
std::vector<BCECmpBlock> Comparisons_;
// The original entry block (before sorting);
@@ -380,16 +433,17 @@ class BCECmpChain {
};
BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
- AliasAnalysis *AA)
+ AliasAnalysis &AA)
: Phi_(Phi) {
assert(!Blocks.empty() && "a chain should have at least one block");
// Now look inside blocks to check for BCE comparisons.
std::vector<BCECmpBlock> Comparisons;
+ BaseIdentifier BaseId;
for (size_t BlockIdx = 0; BlockIdx < Blocks.size(); ++BlockIdx) {
BasicBlock *const Block = Blocks[BlockIdx];
assert(Block && "invalid block");
BCECmpBlock Comparison = visitCmpBlock(Phi.getIncomingValueForBlock(Block),
- Block, Phi.getParent());
+ Block, Phi.getParent(), BaseId);
Comparison.BB = Block;
if (!Comparison.IsValid()) {
LLVM_DEBUG(dbgs() << "chain with invalid BCECmpBlock, no merge.\n");
@@ -411,13 +465,13 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
// chain before sorting. Unless we can abort the chain at this point
// and start anew.
//
- // NOTE: we only handle block with single predecessor for now.
+ // NOTE: we only handle blocks a with single predecessor for now.
if (Comparison.canSplit(AA)) {
LLVM_DEBUG(dbgs()
<< "Split initial block '" << Comparison.BB->getName()
<< "' that does extra work besides compare\n");
Comparison.RequireSplit = true;
- enqueueBlock(Comparisons, Comparison);
+ enqueueBlock(Comparisons, std::move(Comparison));
} else {
LLVM_DEBUG(dbgs()
<< "ignoring initial block '" << Comparison.BB->getName()
@@ -450,7 +504,7 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
// We could still merge bb1 and bb2 though.
return;
}
- enqueueBlock(Comparisons, Comparison);
+ enqueueBlock(Comparisons, std::move(Comparison));
}
// It is possible we have no suitable comparison to merge.
@@ -466,9 +520,11 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
#endif // MERGEICMPS_DOT_ON
// Reorder blocks by LHS. We can do that without changing the
// semantics because we are only accessing dereferencable memory.
- llvm::sort(Comparisons_, [](const BCECmpBlock &a, const BCECmpBlock &b) {
- return a.Lhs() < b.Lhs();
- });
+ llvm::sort(Comparisons_,
+ [](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) {
+ return std::tie(LhsBlock.Lhs(), LhsBlock.Rhs()) <
+ std::tie(RhsBlock.Lhs(), RhsBlock.Rhs());
+ });
#ifdef MERGEICMPS_DOT_ON
errs() << "AFTER REORDERING:\n\n";
dump();
@@ -498,162 +554,205 @@ void BCECmpChain::dump() const {
}
#endif // MERGEICMPS_DOT_ON
-bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI,
- AliasAnalysis *AA) {
- // First pass to check if there is at least one merge. If not, we don't do
- // anything and we keep analysis passes intact.
- {
- bool AtLeastOneMerged = false;
- for (size_t I = 1; I < Comparisons_.size(); ++I) {
- if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) {
- AtLeastOneMerged = true;
- break;
+namespace {
+
+// A class to compute the name of a set of merged basic blocks.
+// This is optimized for the common case of no block names.
+class MergedBlockName {
+ // Storage for the uncommon case of several named blocks.
+ SmallString<16> Scratch;
+
+public:
+ explicit MergedBlockName(ArrayRef<BCECmpBlock> Comparisons)
+ : Name(makeName(Comparisons)) {}
+ const StringRef Name;
+
+private:
+ StringRef makeName(ArrayRef<BCECmpBlock> Comparisons) {
+ assert(!Comparisons.empty() && "no basic block");
+ // Fast path: only one block, or no names at all.
+ if (Comparisons.size() == 1)
+ return Comparisons[0].BB->getName();
+ const int size = std::accumulate(Comparisons.begin(), Comparisons.end(), 0,
+ [](int i, const BCECmpBlock &Cmp) {
+ return i + Cmp.BB->getName().size();
+ });
+ if (size == 0)
+ return StringRef("", 0);
+
+ // Slow path: at least two blocks, at least one block with a name.
+ Scratch.clear();
+ // We'll have `size` bytes for name and `Comparisons.size() - 1` bytes for
+ // separators.
+ Scratch.reserve(size + Comparisons.size() - 1);
+ const auto append = [this](StringRef str) {
+ Scratch.append(str.begin(), str.end());
+ };
+ append(Comparisons[0].BB->getName());
+ for (int I = 1, E = Comparisons.size(); I < E; ++I) {
+ const BasicBlock *const BB = Comparisons[I].BB;
+ if (!BB->getName().empty()) {
+ append("+");
+ append(BB->getName());
}
}
- if (!AtLeastOneMerged) return false;
+ return StringRef(Scratch);
}
+};
+} // namespace
+
+// Merges the given contiguous comparison blocks into one memcmp block.
+static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
+ BasicBlock *const InsertBefore,
+ BasicBlock *const NextCmpBlock,
+ PHINode &Phi, const TargetLibraryInfo &TLI,
+ AliasAnalysis &AA, DomTreeUpdater &DTU) {
+ assert(!Comparisons.empty() && "merging zero comparisons");
+ LLVMContext &Context = NextCmpBlock->getContext();
+ const BCECmpBlock &FirstCmp = Comparisons[0];
+
+ // Create a new cmp block before next cmp block.
+ BasicBlock *const BB =
+ BasicBlock::Create(Context, MergedBlockName(Comparisons).Name,
+ NextCmpBlock->getParent(), InsertBefore);
+ IRBuilder<> Builder(BB);
+ // Add the GEPs from the first BCECmpBlock.
+ Value *const Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone());
+ Value *const Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone());
+
+ Value *IsEqual = nullptr;
+ LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons -> "
+ << BB->getName() << "\n");
+ if (Comparisons.size() == 1) {
+ LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n");
+ Value *const LhsLoad =
+ Builder.CreateLoad(FirstCmp.Lhs().LoadI->getType(), Lhs);
+ Value *const RhsLoad =
+ Builder.CreateLoad(FirstCmp.Rhs().LoadI->getType(), Rhs);
+ // There are no blocks to merge, just do the comparison.
+ IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad);
+ } else {
+ // If there is one block that requires splitting, we do it now, i.e.
+ // just before we know we will collapse the chain. The instructions
+ // can be executed before any of the instructions in the chain.
+ const auto ToSplit =
+ std::find_if(Comparisons.begin(), Comparisons.end(),
+ [](const BCECmpBlock &B) { return B.RequireSplit; });
+ if (ToSplit != Comparisons.end()) {
+ LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n");
+ ToSplit->split(BB, AA);
+ }
- // Remove phi references to comparison blocks, they will be rebuilt as we
- // merge the blocks.
- for (const auto &Comparison : Comparisons_) {
- Phi_.removeIncomingValue(Comparison.BB, false);
- }
+ const unsigned TotalSizeBits = std::accumulate(
+ Comparisons.begin(), Comparisons.end(), 0u,
+ [](int Size, const BCECmpBlock &C) { return Size + C.SizeBits(); });
- // If entry block is part of the chain, we need to make the first block
- // of the chain the new entry block of the function.
- BasicBlock *Entry = &Comparisons_[0].BB->getParent()->getEntryBlock();
- for (size_t I = 1; I < Comparisons_.size(); ++I) {
- if (Entry == Comparisons_[I].BB) {
- BasicBlock *NEntryBB = BasicBlock::Create(Entry->getContext(), "",
- Entry->getParent(), Entry);
- BranchInst::Create(Entry, NEntryBB);
- break;
- }
+ // Create memcmp() == 0.
+ const auto &DL = Phi.getModule()->getDataLayout();
+ Value *const MemCmpCall = emitMemCmp(
+ Lhs, Rhs,
+ ConstantInt::get(DL.getIntPtrType(Context), TotalSizeBits / 8), Builder,
+ DL, &TLI);
+ IsEqual = Builder.CreateICmpEQ(
+ MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0));
}
- // Point the predecessors of the chain to the first comparison block (which is
- // the new entry point) and update the entry block of the chain.
- if (EntryBlock_ != Comparisons_[0].BB) {
- EntryBlock_->replaceAllUsesWith(Comparisons_[0].BB);
- EntryBlock_ = Comparisons_[0].BB;
+ BasicBlock *const PhiBB = Phi.getParent();
+ // Add a branch to the next basic block in the chain.
+ if (NextCmpBlock == PhiBB) {
+ // Continue to phi, passing it the comparison result.
+ Builder.CreateBr(PhiBB);
+ Phi.addIncoming(IsEqual, BB);
+ DTU.applyUpdates({{DominatorTree::Insert, BB, PhiBB}});
+ } else {
+ // Continue to next block if equal, exit to phi else.
+ Builder.CreateCondBr(IsEqual, NextCmpBlock, PhiBB);
+ Phi.addIncoming(ConstantInt::getFalse(Context), BB);
+ DTU.applyUpdates({{DominatorTree::Insert, BB, NextCmpBlock},
+ {DominatorTree::Insert, BB, PhiBB}});
}
+ return BB;
+}
+
+bool BCECmpChain::simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
+ DomTreeUpdater &DTU) {
+ assert(Comparisons_.size() >= 2 && "simplifying trivial BCECmpChain");
+ // First pass to check if there is at least one merge. If not, we don't do
+ // anything and we keep analysis passes intact.
+ const auto AtLeastOneMerged = [this]() {
+ for (size_t I = 1; I < Comparisons_.size(); ++I) {
+ if (IsContiguous(Comparisons_[I - 1], Comparisons_[I]))
+ return true;
+ }
+ return false;
+ };
+ if (!AtLeastOneMerged())
+ return false;
- // Effectively merge blocks.
+ LLVM_DEBUG(dbgs() << "Simplifying comparison chain starting at block "
+ << EntryBlock_->getName() << "\n");
+
+ // Effectively merge blocks. We go in the reverse direction from the phi block
+ // so that the next block is always available to branch to.
+ const auto mergeRange = [this, &TLI, &AA, &DTU](int I, int Num,
+ BasicBlock *InsertBefore,
+ BasicBlock *Next) {
+ return mergeComparisons(makeArrayRef(Comparisons_).slice(I, Num),
+ InsertBefore, Next, Phi_, TLI, AA, DTU);
+ };
int NumMerged = 1;
- for (size_t I = 1; I < Comparisons_.size(); ++I) {
- if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) {
+ BasicBlock *NextCmpBlock = Phi_.getParent();
+ for (int I = static_cast<int>(Comparisons_.size()) - 2; I >= 0; --I) {
+ if (IsContiguous(Comparisons_[I], Comparisons_[I + 1])) {
+ LLVM_DEBUG(dbgs() << "Merging block " << Comparisons_[I].BB->getName()
+ << " into " << Comparisons_[I + 1].BB->getName()
+ << "\n");
++NumMerged;
} else {
- // Merge all previous comparisons and start a new merge block.
- mergeComparisons(
- makeArrayRef(Comparisons_).slice(I - NumMerged, NumMerged),
- Comparisons_[I].BB, Phi_, TLI, AA);
+ NextCmpBlock = mergeRange(I + 1, NumMerged, NextCmpBlock, NextCmpBlock);
NumMerged = 1;
}
}
- mergeComparisons(makeArrayRef(Comparisons_)
- .slice(Comparisons_.size() - NumMerged, NumMerged),
- nullptr, Phi_, TLI, AA);
-
- return true;
-}
-
-void BCECmpChain::mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
- BasicBlock *const NextBBInChain,
- PHINode &Phi,
- const TargetLibraryInfo *const TLI,
- AliasAnalysis *AA) {
- assert(!Comparisons.empty());
- const auto &FirstComparison = *Comparisons.begin();
- BasicBlock *const BB = FirstComparison.BB;
- LLVMContext &Context = BB->getContext();
-
- if (Comparisons.size() >= 2) {
- // If there is one block that requires splitting, we do it now, i.e.
- // just before we know we will collapse the chain. The instructions
- // can be executed before any of the instructions in the chain.
- auto C = std::find_if(Comparisons.begin(), Comparisons.end(),
- [](const BCECmpBlock &B) { return B.RequireSplit; });
- if (C != Comparisons.end())
- C->split(EntryBlock_, AA);
-
- LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n");
- const auto TotalSize =
- std::accumulate(Comparisons.begin(), Comparisons.end(), 0,
- [](int Size, const BCECmpBlock &C) {
- return Size + C.SizeBits();
- }) /
- 8;
-
- // Incoming edges do not need to be updated, and both GEPs are already
- // computing the right address, we just need to:
- // - replace the two loads and the icmp with the memcmp
- // - update the branch
- // - update the incoming values in the phi.
- FirstComparison.BranchI->eraseFromParent();
- FirstComparison.CmpI->eraseFromParent();
- FirstComparison.Lhs().LoadI->eraseFromParent();
- FirstComparison.Rhs().LoadI->eraseFromParent();
-
- IRBuilder<> Builder(BB);
- const auto &DL = Phi.getModule()->getDataLayout();
- Value *const MemCmpCall = emitMemCmp(
- FirstComparison.Lhs().GEP, FirstComparison.Rhs().GEP,
- ConstantInt::get(DL.getIntPtrType(Context), TotalSize),
- Builder, DL, TLI);
- Value *const MemCmpIsZero = Builder.CreateICmpEQ(
- MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0));
+ // Insert the entry block for the new chain before the old entry block.
+ // If the old entry block was the function entry, this ensures that the new
+ // entry can become the function entry.
+ NextCmpBlock = mergeRange(0, NumMerged, EntryBlock_, NextCmpBlock);
+
+ // Replace the original cmp chain with the new cmp chain by pointing all
+ // predecessors of EntryBlock_ to NextCmpBlock instead. This makes all cmp
+ // blocks in the old chain unreachable.
+ while (!pred_empty(EntryBlock_)) {
+ BasicBlock* const Pred = *pred_begin(EntryBlock_);
+ LLVM_DEBUG(dbgs() << "Updating jump into old chain from " << Pred->getName()
+ << "\n");
+ Pred->getTerminator()->replaceUsesOfWith(EntryBlock_, NextCmpBlock);
+ DTU.applyUpdates({{DominatorTree::Delete, Pred, EntryBlock_},
+ {DominatorTree::Insert, Pred, NextCmpBlock}});
+ }
- // Add a branch to the next basic block in the chain.
- if (NextBBInChain) {
- Builder.CreateCondBr(MemCmpIsZero, NextBBInChain, Phi.getParent());
- Phi.addIncoming(ConstantInt::getFalse(Context), BB);
- } else {
- Builder.CreateBr(Phi.getParent());
- Phi.addIncoming(MemCmpIsZero, BB);
- }
+ // If the old cmp chain was the function entry, we need to update the function
+ // entry.
+ const bool ChainEntryIsFnEntry =
+ (EntryBlock_ == &EntryBlock_->getParent()->getEntryBlock());
+ if (ChainEntryIsFnEntry && DTU.hasDomTree()) {
+ LLVM_DEBUG(dbgs() << "Changing function entry from "
+ << EntryBlock_->getName() << " to "
+ << NextCmpBlock->getName() << "\n");
+ DTU.getDomTree().setNewRoot(NextCmpBlock);
+ DTU.applyUpdates({{DominatorTree::Delete, NextCmpBlock, EntryBlock_}});
+ }
+ EntryBlock_ = nullptr;
- // Delete merged blocks.
- for (size_t I = 1; I < Comparisons.size(); ++I) {
- BasicBlock *CBB = Comparisons[I].BB;
- CBB->replaceAllUsesWith(BB);
- CBB->eraseFromParent();
- }
- } else {
- assert(Comparisons.size() == 1);
- // There are no blocks to merge, but we still need to update the branches.
- LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n");
- if (NextBBInChain) {
- if (FirstComparison.BranchI->isConditional()) {
- LLVM_DEBUG(dbgs() << "conditional -> conditional\n");
- // Just update the "true" target, the "false" target should already be
- // the phi block.
- assert(FirstComparison.BranchI->getSuccessor(1) == Phi.getParent());
- FirstComparison.BranchI->setSuccessor(0, NextBBInChain);
- Phi.addIncoming(ConstantInt::getFalse(Context), BB);
- } else {
- LLVM_DEBUG(dbgs() << "unconditional -> conditional\n");
- // Replace the unconditional branch by a conditional one.
- FirstComparison.BranchI->eraseFromParent();
- IRBuilder<> Builder(BB);
- Builder.CreateCondBr(FirstComparison.CmpI, NextBBInChain,
- Phi.getParent());
- Phi.addIncoming(FirstComparison.CmpI, BB);
- }
- } else {
- if (FirstComparison.BranchI->isConditional()) {
- LLVM_DEBUG(dbgs() << "conditional -> unconditional\n");
- // Replace the conditional branch by an unconditional one.
- FirstComparison.BranchI->eraseFromParent();
- IRBuilder<> Builder(BB);
- Builder.CreateBr(Phi.getParent());
- Phi.addIncoming(FirstComparison.CmpI, BB);
- } else {
- LLVM_DEBUG(dbgs() << "unconditional -> unconditional\n");
- Phi.addIncoming(FirstComparison.CmpI, BB);
- }
- }
+ // Delete merged blocks. This also removes incoming values in phi.
+ SmallVector<BasicBlock *, 16> DeadBlocks;
+ for (auto &Cmp : Comparisons_) {
+ LLVM_DEBUG(dbgs() << "Deleting merged block " << Cmp.BB->getName() << "\n");
+ DeadBlocks.push_back(Cmp.BB);
}
+ DeleteDeadBlocks(DeadBlocks, &DTU);
+
+ Comparisons_.clear();
+ return true;
}
std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi,
@@ -691,8 +790,8 @@ std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi,
return Blocks;
}
-bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI,
- AliasAnalysis *AA) {
+bool processPhi(PHINode &Phi, const TargetLibraryInfo &TLI, AliasAnalysis &AA,
+ DomTreeUpdater &DTU) {
LLVM_DEBUG(dbgs() << "processPhi()\n");
if (Phi.getNumIncomingValues() <= 1) {
LLVM_DEBUG(dbgs() << "skip: only one incoming value in phi\n");
@@ -757,24 +856,54 @@ bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI,
return false;
}
- return CmpChain.simplify(TLI, AA);
+ return CmpChain.simplify(TLI, AA, DTU);
}
-class MergeICmps : public FunctionPass {
- public:
+static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
+ const TargetTransformInfo &TTI, AliasAnalysis &AA,
+ DominatorTree *DT) {
+ LLVM_DEBUG(dbgs() << "MergeICmpsLegacyPass: " << F.getName() << "\n");
+
+ // We only try merging comparisons if the target wants to expand memcmp later.
+ // The rationale is to avoid turning small chains into memcmp calls.
+ if (!TTI.enableMemCmpExpansion(F.hasOptSize(), true))
+ return false;
+
+ // If we don't have memcmp avaiable we can't emit calls to it.
+ if (!TLI.has(LibFunc_memcmp))
+ return false;
+
+ DomTreeUpdater DTU(DT, /*PostDominatorTree*/ nullptr,
+ DomTreeUpdater::UpdateStrategy::Eager);
+
+ bool MadeChange = false;
+
+ for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
+ // A Phi operation is always first in a basic block.
+ if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
+ MadeChange |= processPhi(*Phi, TLI, AA, DTU);
+ }
+
+ return MadeChange;
+}
+
+class MergeICmpsLegacyPass : public FunctionPass {
+public:
static char ID;
- MergeICmps() : FunctionPass(ID) {
- initializeMergeICmpsPass(*PassRegistry::getPassRegistry());
+ MergeICmpsLegacyPass() : FunctionPass(ID) {
+ initializeMergeICmpsLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
if (skipFunction(F)) return false;
const auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- auto PA = runImpl(F, &TLI, &TTI, AA);
- return !PA.areAllPreserved();
+ // MergeICmps does not need the DominatorTree, but we update it if it's
+ // already available.
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ return runImpl(F, TLI, TTI, AA, DTWP ? &DTWP->getDomTree() : nullptr);
}
private:
@@ -782,46 +911,35 @@ class MergeICmps : public FunctionPass {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
}
-
- PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, AliasAnalysis *AA);
};
-PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI,
- AliasAnalysis *AA) {
- LLVM_DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n");
-
- // We only try merging comparisons if the target wants to expand memcmp later.
- // The rationale is to avoid turning small chains into memcmp calls.
- if (!TTI->enableMemCmpExpansion(true)) return PreservedAnalyses::all();
-
- // If we don't have memcmp avaiable we can't emit calls to it.
- if (!TLI->has(LibFunc_memcmp))
- return PreservedAnalyses::all();
-
- bool MadeChange = false;
-
- for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
- // A Phi operation is always first in a basic block.
- if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
- MadeChange |= processPhi(*Phi, TLI, AA);
- }
-
- if (MadeChange) return PreservedAnalyses::none();
- return PreservedAnalyses::all();
-}
+} // namespace
-} // namespace
-
-char MergeICmps::ID = 0;
-INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps",
+char MergeICmpsLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(MergeICmpsLegacyPass, "mergeicmps",
"Merge contiguous icmps into a memcmp", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(MergeICmps, "mergeicmps",
+INITIALIZE_PASS_END(MergeICmpsLegacyPass, "mergeicmps",
"Merge contiguous icmps into a memcmp", false, false)
-Pass *llvm::createMergeICmpsPass() { return new MergeICmps(); }
+Pass *llvm::createMergeICmpsLegacyPass() { return new MergeICmpsLegacyPass(); }
+
+PreservedAnalyses MergeICmpsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ auto &AA = AM.getResult<AAManager>(F);
+ auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+ const bool MadeChanges = runImpl(F, TLI, TTI, AA, DT);
+ if (!MadeChanges)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<GlobalsAA>();
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index ee21feca8d2c..30645f4400e3 100644
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -1,9 +1,8 @@
//===- MergedLoadStoreMotion.cpp - merge and hoist/sink load/stores -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
index 7106ea216ad6..94436b55752a 100644
--- a/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -1,9 +1,8 @@
//===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -427,8 +426,8 @@ NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
RHS = Builder.CreateMul(
RHS, ConstantInt::get(IntPtrTy, IndexedSize / ElementSize));
}
- GetElementPtrInst *NewGEP =
- cast<GetElementPtrInst>(Builder.CreateGEP(Candidate, RHS));
+ GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(
+ Builder.CreateGEP(GEP->getResultElementType(), Candidate, RHS));
NewGEP->setIsInBounds(GEP->isInBounds());
NewGEP->takeName(GEP);
return NewGEP;
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 7cbb0fe70f82..08ac2b666fce 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -1,9 +1,8 @@
//===- NewGVN.cpp - Global Value Numbering Pass ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1167,9 +1166,9 @@ const Expression *NewGVN::createExpression(Instruction *I) const {
SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), SQ);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
- } else if (auto *BI = dyn_cast<BitCastInst>(I)) {
+ } else if (auto *CI = dyn_cast<CastInst>(I)) {
Value *V =
- SimplifyCastInst(BI->getOpcode(), BI->getOperand(0), BI->getType(), SQ);
+ SimplifyCastInst(CI->getOpcode(), E->getOperand(0), CI->getType(), SQ);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
} else if (isa<GetElementPtrInst>(I)) {
@@ -1815,39 +1814,13 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps,
const Expression *
NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) const {
if (auto *EI = dyn_cast<ExtractValueInst>(I)) {
- auto *II = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
- if (II && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
- unsigned Opcode = 0;
- // EI might be an extract from one of our recognised intrinsics. If it
- // is we'll synthesize a semantically equivalent expression instead on
- // an extract value expression.
- switch (II->getIntrinsicID()) {
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow:
- Opcode = Instruction::Add;
- break;
- case Intrinsic::ssub_with_overflow:
- case Intrinsic::usub_with_overflow:
- Opcode = Instruction::Sub;
- break;
- case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow:
- Opcode = Instruction::Mul;
- break;
- default:
- break;
- }
-
- if (Opcode != 0) {
- // Intrinsic recognized. Grab its args to finish building the
- // expression.
- assert(II->getNumArgOperands() == 2 &&
- "Expect two args for recognised intrinsics.");
- return createBinaryExpression(Opcode, EI->getType(),
- II->getArgOperand(0),
- II->getArgOperand(1), I);
- }
- }
+ auto *WO = dyn_cast<WithOverflowInst>(EI->getAggregateOperand());
+ if (WO && EI->getNumIndices() == 1 && *EI->idx_begin() == 0)
+ // EI is an extract from one of our with.overflow intrinsics. Synthesize
+ // a semantically equivalent expression instead of an extract value
+ // expression.
+ return createBinaryExpression(WO->getBinaryOp(), EI->getType(),
+ WO->getLHS(), WO->getRHS(), I);
}
return createAggregateValueExpression(I);
@@ -2011,12 +1984,14 @@ NewGVN::performSymbolicEvaluation(Value *V,
E = performSymbolicLoadEvaluation(I);
break;
case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
E = createExpression(I);
break;
case Instruction::ICmp:
case Instruction::FCmp:
E = performSymbolicCmpEvaluation(I);
break;
+ case Instruction::FNeg:
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -2122,7 +2097,7 @@ void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const {
if (auto *PBranch = dyn_cast<PredicateBranch>(PB))
PredicateToUsers[PBranch->Condition].insert(I);
- else if (auto *PAssume = dyn_cast<PredicateBranch>(PB))
+ else if (auto *PAssume = dyn_cast<PredicateAssume>(PB))
PredicateToUsers[PAssume->Condition].insert(I);
}
@@ -2524,9 +2499,6 @@ void NewGVN::processOutgoingEdges(Instruction *TI, BasicBlock *B) {
// For switches, propagate the case values into the case
// destinations.
- // Remember how many outgoing edges there are to every successor.
- SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
-
Value *SwitchCond = SI->getCondition();
Value *CondEvaluated = findConditionEquivalence(SwitchCond);
// See if we were able to turn this switch statement into a constant.
@@ -2547,7 +2519,6 @@ void NewGVN::processOutgoingEdges(Instruction *TI, BasicBlock *B) {
} else {
for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
BasicBlock *TargetBlock = SI->getSuccessor(i);
- ++SwitchEdges[TargetBlock];
updateReachableEdge(B, TargetBlock);
}
}
@@ -3503,7 +3474,7 @@ bool NewGVN::runGVN() {
"BB containing ToErase deleted unexpectedly!");
ToErase->eraseFromParent();
}
- Changed |= !InstructionsToErase.empty();
+ Changed |= !InstructionsToErase.empty();
// Delete all unreachable blocks.
auto UnreachableBlockPred = [&](const BasicBlock &BB) {
diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 05ea9144f66c..039123218544 100644
--- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -1,9 +1,8 @@
//===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp
index fd2eb85fd7bf..b544f0a39ea8 100644
--- a/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -1,9 +1,8 @@
//===- PlaceSafepoints.cpp - Place GC Safepoints --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -56,7 +55,6 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LegacyPassManager.h"
@@ -179,19 +177,18 @@ struct PlaceSafepoints : public FunctionPass {
// callers job.
static void
InsertSafepointPoll(Instruction *InsertBefore,
- std::vector<CallSite> &ParsePointsNeeded /*rval*/,
+ std::vector<CallBase *> &ParsePointsNeeded /*rval*/,
const TargetLibraryInfo &TLI);
-static bool needsStatepoint(const CallSite &CS, const TargetLibraryInfo &TLI) {
- if (callsGCLeafFunction(CS, TLI))
+static bool needsStatepoint(CallBase *Call, const TargetLibraryInfo &TLI) {
+ if (callsGCLeafFunction(Call, TLI))
return false;
- if (CS.isCall()) {
- CallInst *call = cast<CallInst>(CS.getInstruction());
- if (call->isInlineAsm())
+ if (auto *CI = dyn_cast<CallInst>(Call)) {
+ if (CI->isInlineAsm())
return false;
}
- return !(isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS));
+ return !(isStatepoint(Call) || isGCRelocate(Call) || isGCResult(Call));
}
/// Returns true if this loop is known to contain a call safepoint which
@@ -217,14 +214,14 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
BasicBlock *Current = Pred;
while (true) {
for (Instruction &I : *Current) {
- if (auto CS = CallSite(&I))
+ if (auto *Call = dyn_cast<CallBase>(&I))
// Note: Technically, needing a safepoint isn't quite the right
// condition here. We should instead be checking if the target method
// has an
// unconditional poll. In practice, this is only a theoretical concern
// since we don't have any methods with conditional-only safepoint
// polls.
- if (needsStatepoint(CS, TLI))
+ if (needsStatepoint(Call, TLI))
return true;
}
@@ -360,9 +357,8 @@ bool PlaceBackedgeSafepointsImpl::runOnLoop(Loop *L) {
/// Returns true if an entry safepoint is not required before this callsite in
/// the caller function.
-static bool doesNotRequireEntrySafepointBefore(const CallSite &CS) {
- Instruction *Inst = CS.getInstruction();
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+static bool doesNotRequireEntrySafepointBefore(CallBase *Call) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Call)) {
switch (II->getIntrinsicID()) {
case Intrinsic::experimental_gc_statepoint:
case Intrinsic::experimental_patchpoint_void:
@@ -424,8 +420,8 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
// which can grow the stack by an unbounded amount. This isn't required
// for GC semantics per se, but is a common requirement for languages
// which detect stack overflow via guard pages and then throw exceptions.
- if (auto CS = CallSite(Cursor)) {
- if (doesNotRequireEntrySafepointBefore(CS))
+ if (auto *Call = dyn_cast<CallBase>(Cursor)) {
+ if (doesNotRequireEntrySafepointBefore(Call))
continue;
break;
}
@@ -500,7 +496,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
DT.recalculate(F);
SmallVector<Instruction *, 16> PollsNeeded;
- std::vector<CallSite> ParsePointNeeded;
+ std::vector<CallBase *> ParsePointNeeded;
if (enableBackedgeSafepoints(F)) {
// Construct a pass manager to run the LoopPass backedge logic. We
@@ -589,7 +585,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
// Now that we've identified all the needed safepoint poll locations, insert
// safepoint polls themselves.
for (Instruction *PollLocation : PollsNeeded) {
- std::vector<CallSite> RuntimeCalls;
+ std::vector<CallBase *> RuntimeCalls;
InsertSafepointPoll(PollLocation, RuntimeCalls, TLI);
ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(),
RuntimeCalls.end());
@@ -622,7 +618,7 @@ INITIALIZE_PASS_END(PlaceSafepoints, "place-safepoints", "Place Safepoints",
static void
InsertSafepointPoll(Instruction *InsertBefore,
- std::vector<CallSite> &ParsePointsNeeded /*rval*/,
+ std::vector<CallBase *> &ParsePointsNeeded /*rval*/,
const TargetLibraryInfo &TLI) {
BasicBlock *OrigBB = InsertBefore->getParent();
Module *M = InsertBefore->getModule();
@@ -687,7 +683,7 @@ InsertSafepointPoll(Instruction *InsertBefore,
// These are likely runtime calls. Should we assert that via calling
// convention or something?
- ParsePointsNeeded.push_back(CallSite(CI));
+ ParsePointsNeeded.push_back(CI);
}
assert(ParsePointsNeeded.size() <= Calls.size());
}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index cb893eab1654..fa8c9e2a5fe4 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -1,9 +1,8 @@
//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -267,12 +266,16 @@ static BinaryOperator *CreateNeg(Value *S1, const Twine &Name,
/// Replace 0-X with X*-1.
static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
+ assert((isa<UnaryOperator>(Neg) || isa<BinaryOperator>(Neg)) &&
+ "Expected a Negate!");
+ // FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0.
+ unsigned OpNo = isa<BinaryOperator>(Neg) ? 1 : 0;
Type *Ty = Neg->getType();
Constant *NegOne = Ty->isIntOrIntVectorTy() ?
ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);
- BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg, Neg);
- Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op.
+ BinaryOperator *Res = CreateMul(Neg->getOperand(OpNo), NegOne, "", Neg, Neg);
+ Neg->setOperand(OpNo, Constant::getNullValue(Ty)); // Drop use of op.
Res->takeName(Neg);
Neg->replaceAllUsesWith(Res);
Res->setDebugLoc(Neg->getDebugLoc());
@@ -445,8 +448,10 @@ using RepeatedValue = std::pair<Value*, APInt>;
/// that have all uses inside the expression (i.e. only used by non-leaf nodes
/// of the expression) if it can turn them into binary operators of the right
/// type and thus make the expression bigger.
-static bool LinearizeExprTree(BinaryOperator *I,
+static bool LinearizeExprTree(Instruction *I,
SmallVectorImpl<RepeatedValue> &Ops) {
+ assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
+ "Expected a UnaryOperator or BinaryOperator!");
LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
unsigned Opcode = I->getOpcode();
@@ -463,7 +468,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
// with their weights, representing a certain number of paths to the operator.
// If an operator occurs in the worklist multiple times then we found multiple
// ways to get to it.
- SmallVector<std::pair<BinaryOperator*, APInt>, 8> Worklist; // (Op, Weight)
+ SmallVector<std::pair<Instruction*, APInt>, 8> Worklist; // (Op, Weight)
Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
bool Changed = false;
@@ -490,10 +495,10 @@ static bool LinearizeExprTree(BinaryOperator *I,
SmallPtrSet<Value *, 8> Visited; // For sanity checking the iteration scheme.
#endif
while (!Worklist.empty()) {
- std::pair<BinaryOperator*, APInt> P = Worklist.pop_back_val();
+ std::pair<Instruction*, APInt> P = Worklist.pop_back_val();
I = P.first; // We examine the operands of this binary operator.
- for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { // Visit operands.
+ for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
Value *Op = I->getOperand(OpIdx);
APInt Weight = P.second; // Number of paths to this operand.
LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
@@ -573,14 +578,14 @@ static bool LinearizeExprTree(BinaryOperator *I,
// If this is a multiply expression, turn any internal negations into
// multiplies by -1 so they can be reassociated.
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op))
- if ((Opcode == Instruction::Mul && match(BO, m_Neg(m_Value()))) ||
- (Opcode == Instruction::FMul && match(BO, m_FNeg(m_Value())))) {
+ if (Instruction *Tmp = dyn_cast<Instruction>(Op))
+ if ((Opcode == Instruction::Mul && match(Tmp, m_Neg(m_Value()))) ||
+ (Opcode == Instruction::FMul && match(Tmp, m_FNeg(m_Value())))) {
LLVM_DEBUG(dbgs()
<< "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
- BO = LowerNegateToMultiply(BO);
- LLVM_DEBUG(dbgs() << *BO << '\n');
- Worklist.push_back(std::make_pair(BO, Weight));
+ Tmp = LowerNegateToMultiply(Tmp);
+ LLVM_DEBUG(dbgs() << *Tmp << '\n');
+ Worklist.push_back(std::make_pair(Tmp, Weight));
Changed = true;
continue;
}
@@ -862,6 +867,8 @@ static Value *NegateValue(Value *V, Instruction *BI,
if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
continue;
+ bool FoundCatchSwitch = false;
+
BasicBlock::iterator InsertPt;
if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
@@ -869,10 +876,30 @@ static Value *NegateValue(Value *V, Instruction *BI,
} else {
InsertPt = ++InstInput->getIterator();
}
- while (isa<PHINode>(InsertPt)) ++InsertPt;
+
+ const BasicBlock *BB = InsertPt->getParent();
+
+ // Make sure we don't move anything before PHIs or exception
+ // handling pads.
+ while (InsertPt != BB->end() && (isa<PHINode>(InsertPt) ||
+ InsertPt->isEHPad())) {
+ if (isa<CatchSwitchInst>(InsertPt))
+ // A catchswitch cannot have anything in the block except
+ // itself and PHIs. We'll bail out below.
+ FoundCatchSwitch = true;
+ ++InsertPt;
+ }
} else {
InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
}
+
+ // We found a catchswitch in the block where we want to move the
+ // neg. We cannot move anything into that block. Bail and just
+ // create the neg before BI, as if we hadn't found an existing
+ // neg.
+ if (FoundCatchSwitch)
+ break;
+
TheNeg->moveBefore(&*InsertPt);
if (TheNeg->getOpcode() == Instruction::Sub) {
TheNeg->setHasNoUnsignedWrap(false);
@@ -1329,8 +1356,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
// So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
// than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
// "z" in the order of X-Y-Z is better than any other orders.
- std::stable_sort(OpndPtrs.begin(), OpndPtrs.end(),
- [](XorOpnd *LHS, XorOpnd *RHS) {
+ llvm::stable_sort(OpndPtrs, [](XorOpnd *LHS, XorOpnd *RHS) {
return LHS->getSymbolicRank() < RHS->getSymbolicRank();
});
@@ -1687,8 +1713,7 @@ static bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
// below our mininum of '4'.
assert(FactorPowerSum >= 4);
- std::stable_sort(Factors.begin(), Factors.end(),
- [](const Factor &LHS, const Factor &RHS) {
+ llvm::stable_sort(Factors, [](const Factor &LHS, const Factor &RHS) {
return LHS.Power > RHS.Power;
});
return true;
@@ -1801,7 +1826,7 @@ Value *ReassociatePass::OptimizeMul(BinaryOperator *I,
return V;
ValueEntry NewEntry = ValueEntry(getRank(V), V);
- Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry);
+ Ops.insert(llvm::lower_bound(Ops, NewEntry), NewEntry);
return nullptr;
}
@@ -2001,7 +2026,7 @@ Instruction *ReassociatePass::canonicalizeNegConstExpr(Instruction *I) {
/// instructions is not allowed.
void ReassociatePass::OptimizeInst(Instruction *I) {
// Only consider operations that we understand.
- if (!isa<BinaryOperator>(I))
+ if (!isa<UnaryOperator>(I) && !isa<BinaryOperator>(I))
return;
if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1)))
@@ -2066,7 +2091,8 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
I = NI;
}
}
- } else if (I->getOpcode() == Instruction::FSub) {
+ } else if (I->getOpcode() == Instruction::FNeg ||
+ I->getOpcode() == Instruction::FSub) {
if (ShouldBreakUpSubtract(I)) {
Instruction *NI = BreakUpSubtract(I, RedoInsts);
RedoInsts.insert(I);
@@ -2075,7 +2101,9 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
} else if (match(I, m_FNeg(m_Value()))) {
// Otherwise, this is a negation. See if the operand is a multiply tree
// and if this is not an inner node of a multiply tree.
- if (isReassociableOp(I->getOperand(1), Instruction::FMul) &&
+ Value *Op = isa<BinaryOperator>(I) ? I->getOperand(1) :
+ I->getOperand(0);
+ if (isReassociableOp(Op, Instruction::FMul) &&
(!I->hasOneUse() ||
!isReassociableOp(I->user_back(), Instruction::FMul))) {
// If the negate was simplified, revisit the users to see if we can
@@ -2142,7 +2170,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
// positions maintained (and so the compiler is deterministic). Note that
// this sorts so that the highest ranking values end up at the beginning of
// the vector.
- std::stable_sort(Ops.begin(), Ops.end());
+ llvm::stable_sort(Ops);
// Now that we have the expression tree in a convenient
// sorted form, optimize it globally if possible.
@@ -2218,8 +2246,15 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
if (std::less<Value *>()(Op1, Op0))
std::swap(Op0, Op1);
auto it = PairMap[Idx].find({Op0, Op1});
- if (it != PairMap[Idx].end())
- Score += it->second;
+ if (it != PairMap[Idx].end()) {
+ // Functions like BreakUpSubtract() can erase the Values we're using
+ // as keys and create new Values after we built the PairMap. There's a
+ // small chance that the new nodes can have the same address as
+ // something already in the table. We shouldn't accumulate the stored
+ // score in that case as it refers to the wrong Value.
+ if (it->second.isValid())
+ Score += it->second.Score;
+ }
unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank);
if (Score > Max || (Score == Max && MaxRank < BestRank)) {
@@ -2288,9 +2323,15 @@ ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
std::swap(Op0, Op1);
if (!Visited.insert({Op0, Op1}).second)
continue;
- auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, 1});
- if (!res.second)
- ++res.first->second;
+ auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, {Op0, Op1, 1}});
+ if (!res.second) {
+ // If either key value has been erased then we've got the same
+ // address by coincidence. That can't happen here because nothing is
+ // erasing values but it can happen by the time we're querying the
+ // map.
+ assert(res.first->second.isValid() && "WeakVH invalidated");
+ ++res.first->second.Score;
+ }
}
}
}
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 018feb035a4f..3296322e00d5 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -1,9 +1,8 @@
//===- Reg2Mem.cpp - Convert registers to allocas -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 42d7ed5bc534..c358258d24cf 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1,9 +1,8 @@
//===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,18 +25,17 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -286,9 +284,9 @@ struct PartiallyConstructedSafepointRecord {
} // end anonymous namespace
-static ArrayRef<Use> GetDeoptBundleOperands(ImmutableCallSite CS) {
+static ArrayRef<Use> GetDeoptBundleOperands(const CallBase *Call) {
Optional<OperandBundleUse> DeoptBundle =
- CS.getOperandBundle(LLVMContext::OB_deopt);
+ Call->getOperandBundle(LLVMContext::OB_deopt);
if (!DeoptBundle.hasValue()) {
assert(AllowStatepointWithNoDeoptInfo &&
@@ -370,14 +368,11 @@ static std::string suffixed_name_or(Value *V, StringRef Suffix,
// given instruction. The analysis is performed immediately before the
// given instruction. Values defined by that instruction are not considered
// live. Values used by that instruction are considered live.
-static void
-analyzeParsePointLiveness(DominatorTree &DT,
- GCPtrLivenessData &OriginalLivenessData, CallSite CS,
- PartiallyConstructedSafepointRecord &Result) {
- Instruction *Inst = CS.getInstruction();
-
+static void analyzeParsePointLiveness(
+ DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, CallBase *Call,
+ PartiallyConstructedSafepointRecord &Result) {
StatepointLiveSetTy LiveSet;
- findLiveSetAtInst(Inst, OriginalLivenessData, LiveSet);
+ findLiveSetAtInst(Call, OriginalLivenessData, LiveSet);
if (PrintLiveSet) {
dbgs() << "Live Variables:\n";
@@ -385,7 +380,7 @@ analyzeParsePointLiveness(DominatorTree &DT,
dbgs() << " " << V->getName() << " " << *V << "\n";
}
if (PrintLiveSetSize) {
- dbgs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n";
+ dbgs() << "Safepoint For: " << Call->getCalledValue()->getName() << "\n";
dbgs() << "Number live values: " << LiveSet.size() << "\n";
}
Result.LiveSet = LiveSet;
@@ -1178,7 +1173,7 @@ findBasePointers(const StatepointLiveSetTy &live,
/// Find the required based pointers (and adjust the live set) for the given
/// parse point.
static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
- CallSite CS,
+ CallBase *Call,
PartiallyConstructedSafepointRecord &result) {
MapVector<Value *, Value *> PointerToBase;
findBasePointers(result.LiveSet, PointerToBase, &DT, DVCache);
@@ -1200,11 +1195,11 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
/// Given an updated version of the dataflow liveness results, update the
/// liveset and base pointer maps for the call site CS.
static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
- CallSite CS,
+ CallBase *Call,
PartiallyConstructedSafepointRecord &result);
static void recomputeLiveInValues(
- Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
+ Function &F, DominatorTree &DT, ArrayRef<CallBase *> toUpdate,
MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
// TODO-PERF: reuse the original liveness, then simply run the dataflow
// again. The old values are still live and will help it stabilize quickly.
@@ -1307,7 +1302,7 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
// Lazily populated map from input types to the canonicalized form mentioned
// in the comment above. This should probably be cached somewhere more
// broadly.
- DenseMap<Type*, Value*> TypeToDeclMap;
+ DenseMap<Type *, Function *> TypeToDeclMap;
for (unsigned i = 0; i < LiveVariables.size(); i++) {
// Generate the gc.relocate call and save the result
@@ -1318,7 +1313,7 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
Type *Ty = LiveVariables[i]->getType();
if (!TypeToDeclMap.count(Ty))
TypeToDeclMap[Ty] = getGCRelocateDecl(Ty);
- Value *GCRelocateDecl = TypeToDeclMap[Ty];
+ Function *GCRelocateDecl = TypeToDeclMap[Ty];
// only specify a debug name if we can give a useful one
CallInst *Reloc = Builder.CreateCall(
@@ -1399,16 +1394,16 @@ public:
} // end anonymous namespace
-static StringRef getDeoptLowering(CallSite CS) {
+static StringRef getDeoptLowering(CallBase *Call) {
const char *DeoptLowering = "deopt-lowering";
- if (CS.hasFnAttr(DeoptLowering)) {
- // FIXME: CallSite has a *really* confusing interface around attributes
+ if (Call->hasFnAttr(DeoptLowering)) {
+ // FIXME: Calls have a *really* confusing interface around attributes
// with values.
- const AttributeList &CSAS = CS.getAttributes();
+ const AttributeList &CSAS = Call->getAttributes();
if (CSAS.hasAttribute(AttributeList::FunctionIndex, DeoptLowering))
return CSAS.getAttribute(AttributeList::FunctionIndex, DeoptLowering)
.getValueAsString();
- Function *F = CS.getCalledFunction();
+ Function *F = Call->getCalledFunction();
assert(F && F->hasFnAttribute(DeoptLowering));
return F->getFnAttribute(DeoptLowering).getValueAsString();
}
@@ -1416,7 +1411,7 @@ static StringRef getDeoptLowering(CallSite CS) {
}
static void
-makeStatepointExplicitImpl(const CallSite CS, /* to replace */
+makeStatepointExplicitImpl(CallBase *Call, /* to replace */
const SmallVectorImpl<Value *> &BasePtrs,
const SmallVectorImpl<Value *> &LiveVariables,
PartiallyConstructedSafepointRecord &Result,
@@ -1427,19 +1422,18 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// immediately before the previous instruction under the assumption that all
// arguments will be available here. We can't insert afterwards since we may
// be replacing a terminator.
- Instruction *InsertBefore = CS.getInstruction();
- IRBuilder<> Builder(InsertBefore);
+ IRBuilder<> Builder(Call);
ArrayRef<Value *> GCArgs(LiveVariables);
uint64_t StatepointID = StatepointDirectives::DefaultStatepointID;
uint32_t NumPatchBytes = 0;
uint32_t Flags = uint32_t(StatepointFlags::None);
- ArrayRef<Use> CallArgs(CS.arg_begin(), CS.arg_end());
- ArrayRef<Use> DeoptArgs = GetDeoptBundleOperands(CS);
+ ArrayRef<Use> CallArgs(Call->arg_begin(), Call->arg_end());
+ ArrayRef<Use> DeoptArgs = GetDeoptBundleOperands(Call);
ArrayRef<Use> TransitionArgs;
if (auto TransitionBundle =
- CS.getOperandBundle(LLVMContext::OB_gc_transition)) {
+ Call->getOperandBundle(LLVMContext::OB_gc_transition)) {
Flags |= uint32_t(StatepointFlags::GCTransition);
TransitionArgs = TransitionBundle->Inputs;
}
@@ -1450,21 +1444,21 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
bool IsDeoptimize = false;
StatepointDirectives SD =
- parseStatepointDirectivesFromAttrs(CS.getAttributes());
+ parseStatepointDirectivesFromAttrs(Call->getAttributes());
if (SD.NumPatchBytes)
NumPatchBytes = *SD.NumPatchBytes;
if (SD.StatepointID)
StatepointID = *SD.StatepointID;
// Pass through the requested lowering if any. The default is live-through.
- StringRef DeoptLowering = getDeoptLowering(CS);
+ StringRef DeoptLowering = getDeoptLowering(Call);
if (DeoptLowering.equals("live-in"))
Flags |= uint32_t(StatepointFlags::DeoptLiveIn);
else {
assert(DeoptLowering.equals("live-through") && "Unsupported value!");
}
- Value *CallTarget = CS.getCalledValue();
+ Value *CallTarget = Call->getCalledValue();
if (Function *F = dyn_cast<Function>(CallTarget)) {
if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize) {
// Calls to llvm.experimental.deoptimize are lowered to calls to the
@@ -1481,8 +1475,9 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// calls to @llvm.experimental.deoptimize with different argument types in
// the same module. This is fine -- we assume the frontend knew what it
// was doing when generating this kind of IR.
- CallTarget =
- F->getParent()->getOrInsertFunction("__llvm_deoptimize", FTy);
+ CallTarget = F->getParent()
+ ->getOrInsertFunction("__llvm_deoptimize", FTy)
+ .getCallee();
IsDeoptimize = true;
}
@@ -1490,57 +1485,56 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// Create the statepoint given all the arguments
Instruction *Token = nullptr;
- if (CS.isCall()) {
- CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
- CallInst *Call = Builder.CreateGCStatepointCall(
+ if (auto *CI = dyn_cast<CallInst>(Call)) {
+ CallInst *SPCall = Builder.CreateGCStatepointCall(
StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs,
TransitionArgs, DeoptArgs, GCArgs, "safepoint_token");
- Call->setTailCallKind(ToReplace->getTailCallKind());
- Call->setCallingConv(ToReplace->getCallingConv());
+ SPCall->setTailCallKind(CI->getTailCallKind());
+ SPCall->setCallingConv(CI->getCallingConv());
// Currently we will fail on parameter attributes and on certain
// function attributes. In case if we can handle this set of attributes -
// set up function attrs directly on statepoint and return attrs later for
// gc_result intrinsic.
- Call->setAttributes(legalizeCallAttributes(ToReplace->getAttributes()));
+ SPCall->setAttributes(legalizeCallAttributes(CI->getAttributes()));
- Token = Call;
+ Token = SPCall;
// Put the following gc_result and gc_relocate calls immediately after the
// the old call (which we're about to delete)
- assert(ToReplace->getNextNode() && "Not a terminator, must have next!");
- Builder.SetInsertPoint(ToReplace->getNextNode());
- Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc());
+ assert(CI->getNextNode() && "Not a terminator, must have next!");
+ Builder.SetInsertPoint(CI->getNextNode());
+ Builder.SetCurrentDebugLocation(CI->getNextNode()->getDebugLoc());
} else {
- InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction());
+ auto *II = cast<InvokeInst>(Call);
// Insert the new invoke into the old block. We'll remove the old one in a
// moment at which point this will become the new terminator for the
// original block.
- InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
- StatepointID, NumPatchBytes, CallTarget, ToReplace->getNormalDest(),
- ToReplace->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs,
- GCArgs, "statepoint_token");
+ InvokeInst *SPInvoke = Builder.CreateGCStatepointInvoke(
+ StatepointID, NumPatchBytes, CallTarget, II->getNormalDest(),
+ II->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs, GCArgs,
+ "statepoint_token");
- Invoke->setCallingConv(ToReplace->getCallingConv());
+ SPInvoke->setCallingConv(II->getCallingConv());
// Currently we will fail on parameter attributes and on certain
// function attributes. In case if we can handle this set of attributes -
// set up function attrs directly on statepoint and return attrs later for
// gc_result intrinsic.
- Invoke->setAttributes(legalizeCallAttributes(ToReplace->getAttributes()));
+ SPInvoke->setAttributes(legalizeCallAttributes(II->getAttributes()));
- Token = Invoke;
+ Token = SPInvoke;
// Generate gc relocates in exceptional path
- BasicBlock *UnwindBlock = ToReplace->getUnwindDest();
+ BasicBlock *UnwindBlock = II->getUnwindDest();
assert(!isa<PHINode>(UnwindBlock->begin()) &&
UnwindBlock->getUniquePredecessor() &&
"can't safely insert in this block!");
Builder.SetInsertPoint(&*UnwindBlock->getFirstInsertionPt());
- Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
+ Builder.SetCurrentDebugLocation(II->getDebugLoc());
// Attach exceptional gc relocates to the landingpad.
Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst();
@@ -1551,7 +1545,7 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
Builder);
// Generate gc relocates and returns for normal block
- BasicBlock *NormalDest = ToReplace->getNormalDest();
+ BasicBlock *NormalDest = II->getNormalDest();
assert(!isa<PHINode>(NormalDest->begin()) &&
NormalDest->getUniquePredecessor() &&
"can't safely insert in this block!");
@@ -1568,16 +1562,15 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// transform the tail-call like structure to a call to a void function
// followed by unreachable to get better codegen.
Replacements.push_back(
- DeferredReplacement::createDeoptimizeReplacement(CS.getInstruction()));
+ DeferredReplacement::createDeoptimizeReplacement(Call));
} else {
Token->setName("statepoint_token");
- if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
- StringRef Name =
- CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
- CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), Name);
+ if (!Call->getType()->isVoidTy() && !Call->use_empty()) {
+ StringRef Name = Call->hasName() ? Call->getName() : "";
+ CallInst *GCResult = Builder.CreateGCResult(Token, Call->getType(), Name);
GCResult->setAttributes(
AttributeList::get(GCResult->getContext(), AttributeList::ReturnIndex,
- CS.getAttributes().getRetAttributes()));
+ Call->getAttributes().getRetAttributes()));
// We cannot RAUW or delete CS.getInstruction() because it could be in the
// live set of some other safepoint, in which case that safepoint's
@@ -1586,10 +1579,9 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// after the live sets have been made explicit in the IR, and we no longer
// have raw pointers to worry about.
Replacements.emplace_back(
- DeferredReplacement::createRAUW(CS.getInstruction(), GCResult));
+ DeferredReplacement::createRAUW(Call, GCResult));
} else {
- Replacements.emplace_back(
- DeferredReplacement::createDelete(CS.getInstruction()));
+ Replacements.emplace_back(DeferredReplacement::createDelete(Call));
}
}
@@ -1606,7 +1598,7 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// WARNING: Does not do any fixup to adjust users of the original live
// values. That's the callers responsibility.
static void
-makeStatepointExplicit(DominatorTree &DT, CallSite CS,
+makeStatepointExplicit(DominatorTree &DT, CallBase *Call,
PartiallyConstructedSafepointRecord &Result,
std::vector<DeferredReplacement> &Replacements) {
const auto &LiveSet = Result.LiveSet;
@@ -1625,7 +1617,7 @@ makeStatepointExplicit(DominatorTree &DT, CallSite CS,
assert(LiveVec.size() == BaseVec.size());
// Do the actual rewriting and delete the old statepoint
- makeStatepointExplicitImpl(CS, BaseVec, LiveVec, Result, Replacements);
+ makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements);
}
// Helper function for the relocationViaAlloca.
@@ -1636,7 +1628,7 @@ makeStatepointExplicit(DominatorTree &DT, CallSite CS,
// for sanity checking.
static void
insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
- DenseMap<Value *, Value *> &AllocaMap,
+ DenseMap<Value *, AllocaInst *> &AllocaMap,
DenseSet<Value *> &VisitedLiveValues) {
for (User *U : GCRelocs) {
GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U);
@@ -1671,7 +1663,7 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
// "insertRelocationStores" but works for rematerialized values.
static void insertRematerializationStores(
const RematerializedValueMapTy &RematerializedValues,
- DenseMap<Value *, Value *> &AllocaMap,
+ DenseMap<Value *, AllocaInst *> &AllocaMap,
DenseSet<Value *> &VisitedLiveValues) {
for (auto RematerializedValuePair: RematerializedValues) {
Instruction *RematerializedValue = RematerializedValuePair.first;
@@ -1704,7 +1696,7 @@ static void relocationViaAlloca(
#endif
// TODO-PERF: change data structures, reserve
- DenseMap<Value *, Value *> AllocaMap;
+ DenseMap<Value *, AllocaInst *> AllocaMap;
SmallVector<AllocaInst *, 200> PromotableAllocas;
// Used later to chack that we have enough allocas to store all values
std::size_t NumRematerializedValues = 0;
@@ -1774,7 +1766,7 @@ static void relocationViaAlloca(
SmallVector<AllocaInst *, 64> ToClobber;
for (auto Pair : AllocaMap) {
Value *Def = Pair.first;
- AllocaInst *Alloca = cast<AllocaInst>(Pair.second);
+ AllocaInst *Alloca = Pair.second;
// This value was relocated
if (VisitedLiveValues.count(Def)) {
@@ -1806,7 +1798,7 @@ static void relocationViaAlloca(
// Update use with load allocas and add store for gc_relocated.
for (auto Pair : AllocaMap) {
Value *Def = Pair.first;
- Value *Alloca = Pair.second;
+ AllocaInst *Alloca = Pair.second;
// We pre-record the uses of allocas so that we dont have to worry about
// later update that changes the user information..
@@ -1834,13 +1826,15 @@ static void relocationViaAlloca(
PHINode *Phi = cast<PHINode>(Use);
for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) {
if (Def == Phi->getIncomingValue(i)) {
- LoadInst *Load = new LoadInst(
- Alloca, "", Phi->getIncomingBlock(i)->getTerminator());
+ LoadInst *Load =
+ new LoadInst(Alloca->getAllocatedType(), Alloca, "",
+ Phi->getIncomingBlock(i)->getTerminator());
Phi->setIncomingValue(i, Load);
}
}
} else {
- LoadInst *Load = new LoadInst(Alloca, "", Use);
+ LoadInst *Load =
+ new LoadInst(Alloca->getAllocatedType(), Alloca, "", Use);
Use->replaceUsesOfWith(Def, Load);
}
}
@@ -1893,25 +1887,25 @@ template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
/// Insert holders so that each Value is obviously live through the entire
/// lifetime of the call.
-static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
+static void insertUseHolderAfter(CallBase *Call, const ArrayRef<Value *> Values,
SmallVectorImpl<CallInst *> &Holders) {
if (Values.empty())
// No values to hold live, might as well not insert the empty holder
return;
- Module *M = CS.getInstruction()->getModule();
+ Module *M = Call->getModule();
// Use a dummy vararg function to actually hold the values live
- Function *Func = cast<Function>(M->getOrInsertFunction(
- "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true)));
- if (CS.isCall()) {
+ FunctionCallee Func = M->getOrInsertFunction(
+ "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true));
+ if (isa<CallInst>(Call)) {
// For call safepoints insert dummy calls right after safepoint
- Holders.push_back(CallInst::Create(Func, Values, "",
- &*++CS.getInstruction()->getIterator()));
+ Holders.push_back(
+ CallInst::Create(Func, Values, "", &*++Call->getIterator()));
return;
}
// For invoke safepooints insert dummy calls both in normal and
// exceptional destination blocks
- auto *II = cast<InvokeInst>(CS.getInstruction());
+ auto *II = cast<InvokeInst>(Call);
Holders.push_back(CallInst::Create(
Func, Values, "", &*II->getNormalDest()->getFirstInsertionPt()));
Holders.push_back(CallInst::Create(
@@ -1919,7 +1913,7 @@ static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
}
static void findLiveReferences(
- Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
+ Function &F, DominatorTree &DT, ArrayRef<CallBase *> toUpdate,
MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
GCPtrLivenessData OriginalLivenessData;
computeLiveInValues(DT, F, OriginalLivenessData);
@@ -2022,7 +2016,7 @@ static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPh
// to relocate. Remove this values from the live set, rematerialize them after
// statepoint and record them in "Info" structure. Note that similar to
// relocated values we don't do any user adjustments here.
-static void rematerializeLiveValues(CallSite CS,
+static void rematerializeLiveValues(CallBase *Call,
PartiallyConstructedSafepointRecord &Info,
TargetTransformInfo &TTI) {
const unsigned int ChainLengthThreshold = 10;
@@ -2076,7 +2070,7 @@ static void rematerializeLiveValues(CallSite CS,
// For invokes we need to rematerialize each chain twice - for normal and
// for unwind basic blocks. Model this by multiplying cost by two.
- if (CS.isInvoke()) {
+ if (isa<InvokeInst>(Call)) {
Cost *= 2;
}
// If it's too expensive - skip it
@@ -2144,14 +2138,14 @@ static void rematerializeLiveValues(CallSite CS,
// Different cases for calls and invokes. For invokes we need to clone
// instructions both on normal and unwind path.
- if (CS.isCall()) {
- Instruction *InsertBefore = CS.getInstruction()->getNextNode();
+ if (isa<CallInst>(Call)) {
+ Instruction *InsertBefore = Call->getNextNode();
assert(InsertBefore);
Instruction *RematerializedValue = rematerializeChain(
InsertBefore, RootOfChain, Info.PointerToBase[LiveValue]);
Info.RematerializedValues[RematerializedValue] = LiveValue;
} else {
- InvokeInst *Invoke = cast<InvokeInst>(CS.getInstruction());
+ auto *Invoke = cast<InvokeInst>(Call);
Instruction *NormalInsertBefore =
&*Invoke->getNormalDest()->getFirstInsertionPt();
@@ -2176,25 +2170,25 @@ static void rematerializeLiveValues(CallSite CS,
static bool insertParsePoints(Function &F, DominatorTree &DT,
TargetTransformInfo &TTI,
- SmallVectorImpl<CallSite> &ToUpdate) {
+ SmallVectorImpl<CallBase *> &ToUpdate) {
#ifndef NDEBUG
// sanity check the input
- std::set<CallSite> Uniqued;
+ std::set<CallBase *> Uniqued;
Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
- for (CallSite CS : ToUpdate)
- assert(CS.getInstruction()->getFunction() == &F);
+ for (CallBase *Call : ToUpdate)
+ assert(Call->getFunction() == &F);
#endif
// When inserting gc.relocates for invokes, we need to be able to insert at
// the top of the successor blocks. See the comment on
// normalForInvokeSafepoint on exactly what is needed. Note that this step
// may restructure the CFG.
- for (CallSite CS : ToUpdate) {
- if (!CS.isInvoke())
+ for (CallBase *Call : ToUpdate) {
+ auto *II = dyn_cast<InvokeInst>(Call);
+ if (!II)
continue;
- auto *II = cast<InvokeInst>(CS.getInstruction());
normalizeForInvokeSafepoint(II->getNormalDest(), II->getParent(), DT);
normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT);
}
@@ -2207,17 +2201,17 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// actual safepoint insertion as arguments. This ensures reference operands
// in the deopt argument list are considered live through the safepoint (and
// thus makes sure they get relocated.)
- for (CallSite CS : ToUpdate) {
+ for (CallBase *Call : ToUpdate) {
SmallVector<Value *, 64> DeoptValues;
- for (Value *Arg : GetDeoptBundleOperands(CS)) {
+ for (Value *Arg : GetDeoptBundleOperands(Call)) {
assert(!isUnhandledGCPointerType(Arg->getType()) &&
"support for FCA unimplemented");
if (isHandledGCPointerType(Arg->getType()))
DeoptValues.push_back(Arg);
}
- insertUseHolderAfter(CS, DeoptValues, Holders);
+ insertUseHolderAfter(Call, DeoptValues, Holders);
}
SmallVector<PartiallyConstructedSafepointRecord, 64> Records(ToUpdate.size());
@@ -2319,7 +2313,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
for (size_t i = 0; i < Records.size(); i++)
makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements);
- ToUpdate.clear(); // prevent accident use of invalid CallSites
+ ToUpdate.clear(); // prevent accident use of invalid calls.
for (auto &PR : Replacements)
PR.doReplacement();
@@ -2384,7 +2378,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
return !Records.empty();
}
-// Handles both return values and arguments for Functions and CallSites.
+// Handles both return values and arguments for Functions and calls.
template <typename AttrHolder>
static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
unsigned Index) {
@@ -2476,12 +2470,13 @@ static void stripNonValidDataFromBody(Function &F) {
stripInvalidMetadataFromInstruction(I);
- if (CallSite CS = CallSite(&I)) {
- for (int i = 0, e = CS.arg_size(); i != e; i++)
- if (isa<PointerType>(CS.getArgument(i)->getType()))
- RemoveNonValidAttrAtIndex(Ctx, CS, i + AttributeList::FirstArgIndex);
- if (isa<PointerType>(CS.getType()))
- RemoveNonValidAttrAtIndex(Ctx, CS, AttributeList::ReturnIndex);
+ if (auto *Call = dyn_cast<CallBase>(&I)) {
+ for (int i = 0, e = Call->arg_size(); i != e; i++)
+ if (isa<PointerType>(Call->getArgOperand(i)->getType()))
+ RemoveNonValidAttrAtIndex(Ctx, *Call,
+ i + AttributeList::FirstArgIndex);
+ if (isa<PointerType>(Call->getType()))
+ RemoveNonValidAttrAtIndex(Ctx, *Call, AttributeList::ReturnIndex);
}
}
@@ -2526,12 +2521,11 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
assert(shouldRewriteStatepointsIn(F) && "mismatch in rewrite decision");
auto NeedsRewrite = [&TLI](Instruction &I) {
- if (ImmutableCallSite CS = ImmutableCallSite(&I))
- return !callsGCLeafFunction(CS, TLI) && !isStatepoint(CS);
+ if (const auto *Call = dyn_cast<CallBase>(&I))
+ return !callsGCLeafFunction(Call, TLI) && !isStatepoint(Call);
return false;
};
-
// Delete any unreachable statepoints so that we don't have unrewritten
// statepoints surviving this pass. This makes testing easier and the
// resulting IR less confusing to human readers.
@@ -2543,7 +2537,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
// Gather all the statepoints which need rewritten. Be careful to only
// consider those in reachable code since we need to ask dominance queries
// when rewriting. We'll delete the unreachable ones in a moment.
- SmallVector<CallSite, 64> ParsePointNeeded;
+ SmallVector<CallBase *, 64> ParsePointNeeded;
for (Instruction &I : instructions(F)) {
// TODO: only the ones with the flag set!
if (NeedsRewrite(I)) {
@@ -2553,7 +2547,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
// isReachableFromEntry() returns true.
assert(DT.isReachableFromEntry(I.getParent()) &&
"no unreachable blocks expected");
- ParsePointNeeded.push_back(CallSite(&I));
+ ParsePointNeeded.push_back(cast<CallBase>(&I));
}
}
@@ -2602,6 +2596,33 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
}
}
+ // Nasty workaround - The base computation code in the main algorithm doesn't
+ // consider the fact that a GEP can be used to convert a scalar to a vector.
+ // The right fix for this is to integrate GEPs into the base rewriting
+ // algorithm properly, this is just a short term workaround to prevent
+ // crashes by canonicalizing such GEPs into fully vector GEPs.
+ for (Instruction &I : instructions(F)) {
+ if (!isa<GetElementPtrInst>(I))
+ continue;
+
+ unsigned VF = 0;
+ for (unsigned i = 0; i < I.getNumOperands(); i++)
+ if (I.getOperand(i)->getType()->isVectorTy()) {
+ assert(VF == 0 ||
+ VF == I.getOperand(i)->getType()->getVectorNumElements());
+ VF = I.getOperand(i)->getType()->getVectorNumElements();
+ }
+
+ // It's the vector to scalar traversal through the pointer operand which
+ // confuses base pointer rewriting, so limit ourselves to that case.
+ if (!I.getOperand(0)->getType()->isVectorTy() && VF != 0) {
+ IRBuilder<> B(&I);
+ auto *Splat = B.CreateVectorSplat(VF, I.getOperand(0));
+ I.setOperand(0, Splat);
+ MadeChange = true;
+ }
+ }
+
MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded);
return MadeChange;
}
@@ -2786,11 +2807,10 @@ static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
}
static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
- CallSite CS,
+ CallBase *Call,
PartiallyConstructedSafepointRecord &Info) {
- Instruction *Inst = CS.getInstruction();
StatepointLiveSetTy Updated;
- findLiveSetAtInst(Inst, RevisedLivenessData, Updated);
+ findLiveSetAtInst(Call, RevisedLivenessData, Updated);
// We may have base pointers which are now live that weren't before. We need
// to update the PointerToBase structure to reflect this.
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 2f6ed05c023b..4093e50ce899 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1,9 +1,8 @@
//===- SCCP.cpp - Sparse Conditional Constant Propagation -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,6 +20,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -210,11 +210,11 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
/// TrackedRetVals - If we are tracking arguments into and the return
/// value out of a function, it will have an entry in this map, indicating
/// what the known return value for the function is.
- DenseMap<Function *, LatticeVal> TrackedRetVals;
+ MapVector<Function *, LatticeVal> TrackedRetVals;
/// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions
/// that return multiple values.
- DenseMap<std::pair<Function *, unsigned>, LatticeVal> TrackedMultipleRetVals;
+ MapVector<std::pair<Function *, unsigned>, LatticeVal> TrackedMultipleRetVals;
/// MRVFunctionsTracked - Each function in TrackedMultipleRetVals is
/// represented here for efficient lookup.
@@ -372,7 +372,7 @@ public:
}
/// getTrackedRetVals - Get the inferred return value map.
- const DenseMap<Function*, LatticeVal> &getTrackedRetVals() {
+ const MapVector<Function*, LatticeVal> &getTrackedRetVals() {
return TrackedRetVals;
}
@@ -614,6 +614,7 @@ private:
void visitCastInst(CastInst &I);
void visitSelectInst(SelectInst &I);
+ void visitUnaryOperator(Instruction &I);
void visitBinaryOperator(Instruction &I);
void visitCmpInst(CmpInst &I);
void visitExtractValueInst(ExtractValueInst &EVI);
@@ -639,6 +640,11 @@ private:
visitTerminator(II);
}
+ void visitCallBrInst (CallBrInst &CBI) {
+ visitCallSite(&CBI);
+ visitTerminator(CBI);
+ }
+
void visitCallSite (CallSite CS);
void visitResumeInst (ResumeInst &I) { /*returns void*/ }
void visitUnreachableInst(UnreachableInst &I) { /*returns void*/ }
@@ -734,6 +740,13 @@ void SCCPSolver::getFeasibleSuccessors(Instruction &TI,
return;
}
+ // In case of callbr, we pessimistically assume that all successors are
+ // feasible.
+ if (isa<CallBrInst>(&TI)) {
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
LLVM_DEBUG(dbgs() << "Unknown terminator instruction: " << TI << '\n');
llvm_unreachable("SCCP: Don't know how to handle this terminator!");
}
@@ -825,7 +838,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
// If we are tracking the return value of this function, merge it in.
if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) {
- DenseMap<Function*, LatticeVal>::iterator TFRVI =
+ MapVector<Function*, LatticeVal>::iterator TFRVI =
TrackedRetVals.find(F);
if (TFRVI != TrackedRetVals.end()) {
mergeInValue(TFRVI->second, F, getValueState(ResultOp));
@@ -958,6 +971,29 @@ void SCCPSolver::visitSelectInst(SelectInst &I) {
markOverdefined(&I);
}
+// Handle Unary Operators.
+void SCCPSolver::visitUnaryOperator(Instruction &I) {
+ LatticeVal V0State = getValueState(I.getOperand(0));
+
+ LatticeVal &IV = ValueState[&I];
+ if (IV.isOverdefined()) return;
+
+ if (V0State.isConstant()) {
+ Constant *C = ConstantExpr::get(I.getOpcode(), V0State.getConstant());
+
+ // op Y -> undef.
+ if (isa<UndefValue>(C))
+ return;
+ return (void)markConstant(IV, &I, C);
+ }
+
+ // If something is undef, wait for it to resolve.
+ if (!V0State.isOverdefined())
+ return;
+
+ markOverdefined(&I);
+}
+
// Handle Binary Operators.
void SCCPSolver::visitBinaryOperator(Instruction &I) {
LatticeVal V1State = getValueState(I.getOperand(0));
@@ -1232,7 +1268,7 @@ CallOverdefined:
// Otherwise, if we have a single return value case, and if the function is
// a declaration, maybe we can constant fold it.
if (F && F->isDeclaration() && !I->getType()->isStructTy() &&
- canConstantFoldCallTo(CS, F)) {
+ canConstantFoldCallTo(cast<CallBase>(CS.getInstruction()), F)) {
SmallVector<Constant*, 8> Operands;
for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
AI != E; ++AI) {
@@ -1253,7 +1289,8 @@ CallOverdefined:
// If we can constant fold this, mark the result of the call as a
// constant.
- if (Constant *C = ConstantFoldCall(CS, F, Operands, TLI)) {
+ if (Constant *C = ConstantFoldCall(cast<CallBase>(CS.getInstruction()), F,
+ Operands, TLI)) {
// call -> undef.
if (isa<UndefValue>(C))
return;
@@ -1315,7 +1352,7 @@ CallOverdefined:
mergeInValue(getStructValueState(I, i), I,
TrackedMultipleRetVals[std::make_pair(F, i)]);
} else {
- DenseMap<Function*, LatticeVal>::iterator TFRVI = TrackedRetVals.find(F);
+ MapVector<Function*, LatticeVal>::iterator TFRVI = TrackedRetVals.find(F);
if (TFRVI == TrackedRetVals.end())
goto CallOverdefined; // Not tracking this callee.
@@ -1472,6 +1509,8 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
else
markOverdefined(&I);
return true;
+ case Instruction::FNeg:
+ break; // fneg undef -> undef
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
@@ -1598,6 +1637,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
return true;
case Instruction::Call:
case Instruction::Invoke:
+ case Instruction::CallBr:
// There are two reasons a call can have an undef result
// 1. It could be tracked.
// 2. It could be constant-foldable.
@@ -2070,12 +2110,22 @@ bool llvm::runIPSCCP(
// If we have forced an edge for an indeterminate value, then force the
// terminator to fold to that edge.
forceIndeterminateEdge(I, Solver);
- bool Folded = ConstantFoldTerminator(I->getParent(),
+ BasicBlock *InstBB = I->getParent();
+ bool Folded = ConstantFoldTerminator(InstBB,
/*DeleteDeadConditions=*/false,
/*TLI=*/nullptr, &DTU);
assert(Folded &&
"Expect TermInst on constantint or blockaddress to be folded");
(void) Folded;
+ // If we folded the terminator to an unconditional branch to another
+ // dead block, replace it with Unreachable, to avoid trying to fold that
+ // branch again.
+ BranchInst *BI = cast<BranchInst>(InstBB->getTerminator());
+ if (BI && BI->isUnconditional() &&
+ !Solver.isBlockExecutable(BI->getSuccessor(0))) {
+ InstBB->getTerminator()->eraseFromParent();
+ new UnreachableInst(InstBB->getContext(), InstBB);
+ }
}
// Mark dead BB for deletion.
DTU.deleteBB(DeadBB);
@@ -2109,7 +2159,7 @@ bool llvm::runIPSCCP(
// whether other functions are optimizable.
SmallVector<ReturnInst*, 8> ReturnsToZap;
- const DenseMap<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
+ const MapVector<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
for (const auto &I : RV) {
Function *F = I.first;
if (I.second.isOverdefined() || F->getReturnType()->isVoidTy())
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index eab77cf4cda9..33f90d0b01e4 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -1,9 +1,8 @@
//===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -222,13 +221,6 @@ public:
} // end anonymous namespace
-namespace llvm {
-
-template <typename T> struct isPodLike;
-template <> struct isPodLike<Slice> { static const bool value = true; };
-
-} // end namespace llvm
-
/// Representation of the alloca slices.
///
/// This class represents the slices of an alloca which are formed by its
@@ -721,6 +713,13 @@ private:
return Base::visitBitCastInst(BC);
}
+ void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
+ if (ASC.use_empty())
+ return markAsDead(ASC);
+
+ return Base::visitAddrSpaceCastInst(ASC);
+ }
+
void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
if (GEPI.use_empty())
return markAsDead(GEPI);
@@ -784,7 +783,10 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&LI);
- const DataLayout &DL = LI.getModule()->getDataLayout();
+ if (LI.isVolatile() &&
+ LI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
+ return PI.setAborted(&LI);
+
uint64_t Size = DL.getTypeStoreSize(LI.getType());
return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
}
@@ -796,7 +798,10 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&SI);
- const DataLayout &DL = SI.getModule()->getDataLayout();
+ if (SI.isVolatile() &&
+ SI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
+ return PI.setAborted(&SI);
+
uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
// If this memory access can be shown to *statically* extend outside the
@@ -831,6 +836,11 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&II);
+ // Don't replace this with a store with a different address space. TODO:
+ // Use a store with the casted new alloca?
+ if (II.isVolatile() && II.getDestAddressSpace() != DL.getAllocaAddrSpace())
+ return PI.setAborted(&II);
+
insertUse(II, Offset, Length ? Length->getLimitedValue()
: AllocSize - Offset.getLimitedValue(),
(bool)Length);
@@ -850,6 +860,13 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&II);
+ // Don't replace this with a load/store with a different address space.
+ // TODO: Use a store with the casted new alloca?
+ if (II.isVolatile() &&
+ (II.getDestAddressSpace() != DL.getAllocaAddrSpace() ||
+ II.getSourceAddressSpace() != DL.getAllocaAddrSpace()))
+ return PI.setAborted(&II);
+
// This side of the transfer is completely out-of-bounds, and so we can
// nuke the entire transfer. However, we also need to nuke the other side
// if already added to our partitions.
@@ -957,7 +974,7 @@ private:
if (!GEP->hasAllZeroIndices())
return GEP;
} else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
- !isa<SelectInst>(I)) {
+ !isa<SelectInst>(I) && !isa<AddrSpaceCastInst>(I)) {
return I;
}
@@ -1173,12 +1190,16 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
/// FIXME: This should be hoisted into a generic utility, likely in
/// Transforms/Util/Local.h
static bool isSafePHIToSpeculate(PHINode &PN) {
+ const DataLayout &DL = PN.getModule()->getDataLayout();
+
// For now, we can only do this promotion if the load is in the same block
// as the PHI, and if there are no stores between the phi and load.
// TODO: Allow recursive phi users.
// TODO: Allow stores.
BasicBlock *BB = PN.getParent();
unsigned MaxAlign = 0;
+ uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType());
+ APInt MaxSize(APWidth, 0);
bool HaveLoad = false;
for (User *U : PN.users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
@@ -1197,15 +1218,15 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
if (BBI->mayWriteToMemory())
return false;
+ uint64_t Size = DL.getTypeStoreSizeInBits(LI->getType());
MaxAlign = std::max(MaxAlign, LI->getAlignment());
+ MaxSize = MaxSize.ult(Size) ? APInt(APWidth, Size) : MaxSize;
HaveLoad = true;
}
if (!HaveLoad)
return false;
- const DataLayout &DL = PN.getModule()->getDataLayout();
-
// We can only transform this if it is safe to push the loads into the
// predecessor blocks. The only thing to watch out for is that we can't put
// a possibly trapping load in the predecessor if it is a critical edge.
@@ -1227,7 +1248,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
// If this pointer is always safe to load, or if we can prove that there
// is already a load in the block, then we can move the load to the pred
// block.
- if (isSafeToLoadUnconditionally(InVal, MaxAlign, DL, TI))
+ if (isSafeToLoadUnconditionally(InVal, MaxAlign, MaxSize, DL, TI))
continue;
return false;
@@ -1239,15 +1260,14 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
static void speculatePHINodeLoads(PHINode &PN) {
LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
- Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
+ LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
+ Type *LoadTy = SomeLoad->getType();
IRBuilderTy PHIBuilder(&PN);
PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
PN.getName() + ".sroa.speculated");
// Get the AA tags and alignment to use from one of the loads. It doesn't
// matter which one we get and if any differ.
- LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
-
AAMDNodes AATags;
SomeLoad->getAAMetadata(AATags);
unsigned Align = SomeLoad->getAlignment();
@@ -1278,7 +1298,8 @@ static void speculatePHINodeLoads(PHINode &PN) {
IRBuilderTy PredBuilder(TI);
LoadInst *Load = PredBuilder.CreateLoad(
- InVal, (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
+ LoadTy, InVal,
+ (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
++NumLoadsSpeculated;
Load->setAlignment(Align);
if (AATags)
@@ -1317,9 +1338,11 @@ static bool isSafeSelectToSpeculate(SelectInst &SI) {
// Both operands to the select need to be dereferenceable, either
// absolutely (e.g. allocas) or at this point because we can see other
// accesses to it.
- if (!isSafeToLoadUnconditionally(TValue, LI->getAlignment(), DL, LI))
+ if (!isSafeToLoadUnconditionally(TValue, LI->getType(), LI->getAlignment(),
+ DL, LI))
return false;
- if (!isSafeToLoadUnconditionally(FValue, LI->getAlignment(), DL, LI))
+ if (!isSafeToLoadUnconditionally(FValue, LI->getType(), LI->getAlignment(),
+ DL, LI))
return false;
}
@@ -1338,10 +1361,10 @@ static void speculateSelectInstLoads(SelectInst &SI) {
assert(LI->isSimple() && "We only speculate simple loads");
IRB.SetInsertPoint(LI);
- LoadInst *TL =
- IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true");
- LoadInst *FL =
- IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false");
+ LoadInst *TL = IRB.CreateLoad(LI->getType(), TV,
+ LI->getName() + ".sroa.speculate.load.true");
+ LoadInst *FL = IRB.CreateLoad(LI->getType(), FV,
+ LI->getName() + ".sroa.speculate.load.false");
NumLoadsSpeculated += 2;
// Transfer alignment and AA info if present.
@@ -1379,8 +1402,8 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
return BasePtr;
- return IRB.CreateInBoundsGEP(nullptr, BasePtr, Indices,
- NamePrefix + "sroa_idx");
+ return IRB.CreateInBoundsGEP(BasePtr->getType()->getPointerElementType(),
+ BasePtr, Indices, NamePrefix + "sroa_idx");
}
/// Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1569,7 +1592,14 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
Value *Int8Ptr = nullptr;
APInt Int8PtrOffset(Offset.getBitWidth(), 0);
- Type *TargetTy = PointerTy->getPointerElementType();
+ PointerType *TargetPtrTy = cast<PointerType>(PointerTy);
+ Type *TargetTy = TargetPtrTy->getElementType();
+
+ // As `addrspacecast` is , `Ptr` (the storage pointer) may have different
+ // address space from the expected `PointerTy` (the pointer to be used).
+ // Adjust the pointer type based the original storage pointer.
+ auto AS = cast<PointerType>(Ptr->getType())->getAddressSpace();
+ PointerTy = TargetTy->getPointerTo(AS);
do {
// First fold any existing GEPs into the offset.
@@ -1599,7 +1629,7 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
OffsetBasePtr = Ptr;
// If we also found a pointer of the right type, we're done.
if (P->getType() == PointerTy)
- return P;
+ break;
}
// Stash this pointer if we've found an i8*.
@@ -1638,8 +1668,11 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
Ptr = OffsetPtr;
// On the off chance we were targeting i8*, guard the bitcast here.
- if (Ptr->getType() != PointerTy)
- Ptr = IRB.CreateBitCast(Ptr, PointerTy, NamePrefix + "sroa_cast");
+ if (cast<PointerType>(Ptr->getType()) != TargetPtrTy) {
+ Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
+ TargetPtrTy,
+ NamePrefix + "sroa_cast");
+ }
return Ptr;
}
@@ -2418,14 +2451,16 @@ private:
unsigned EndIndex = getIndex(NewEndOffset);
assert(EndIndex > BeginIndex && "Empty vector!");
- Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
+ Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "load");
return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
}
Value *rewriteIntegerLoad(LoadInst &LI) {
assert(IntTy && "We cannot insert an integer to the alloca");
assert(!LI.isVolatile());
- Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
+ Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "load");
V = convertValue(DL, IRB, V, IntTy);
assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
@@ -2469,7 +2504,8 @@ private:
(canConvertValue(DL, NewAllocaTy, TargetTy) ||
(IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
TargetTy->isIntegerTy()))) {
- LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(),
LI.isVolatile(), LI.getName());
if (AATags)
NewLI->setAAMetadata(AATags);
@@ -2505,9 +2541,9 @@ private:
}
} else {
Type *LTy = TargetTy->getPointerTo(AS);
- LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
- getSliceAlign(TargetTy),
- LI.isVolatile(), LI.getName());
+ LoadInst *NewLI = IRB.CreateAlignedLoad(
+ TargetTy, getNewAllocaSlicePtr(IRB, LTy), getSliceAlign(TargetTy),
+ LI.isVolatile(), LI.getName());
if (AATags)
NewLI->setAAMetadata(AATags);
if (LI.isVolatile())
@@ -2524,8 +2560,7 @@ private:
"Only integer type loads and stores are split");
assert(SliceSize < DL.getTypeStoreSize(LI.getType()) &&
"Split load isn't smaller than original load");
- assert(LI.getType()->getIntegerBitWidth() ==
- DL.getTypeStoreSizeInBits(LI.getType()) &&
+ assert(DL.typeSizeEqualsStoreSize(LI.getType()) &&
"Non-byte-multiple bit width");
// Move the insertion point just past the load so that we can refer to it.
IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(&LI)));
@@ -2533,8 +2568,8 @@ private:
// basis for the new value. This allows us to replace the uses of LI with
// the computed value, and then replace the placeholder with LI, leaving
// LI only used for this computation.
- Value *Placeholder =
- new LoadInst(UndefValue::get(LI.getType()->getPointerTo(AS)));
+ Value *Placeholder = new LoadInst(
+ LI.getType(), UndefValue::get(LI.getType()->getPointerTo(AS)));
V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
"insert");
LI.replaceAllUsesWith(V);
@@ -2565,7 +2600,8 @@ private:
V = convertValue(DL, IRB, V, SliceTy);
// Mix in the existing elements.
- Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
+ Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "load");
V = insertVector(IRB, Old, V, BeginIndex, "vec");
}
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
@@ -2581,8 +2617,8 @@ private:
assert(IntTy && "We cannot extract an integer from the alloca");
assert(!SI.isVolatile());
if (DL.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
- Value *Old =
- IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
+ Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "oldload");
Old = convertValue(DL, IRB, Old, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
@@ -2619,8 +2655,7 @@ private:
assert(!SI.isVolatile());
assert(V->getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
- assert(V->getType()->getIntegerBitWidth() ==
- DL.getTypeStoreSizeInBits(V->getType()) &&
+ assert(DL.typeSizeEqualsStoreSize(V->getType()) &&
"Non-byte-multiple bit width");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
@@ -2731,15 +2766,26 @@ private:
Type *AllocaTy = NewAI.getAllocatedType();
Type *ScalarTy = AllocaTy->getScalarType();
+
+ const bool CanContinue = [&]() {
+ if (VecTy || IntTy)
+ return true;
+ if (BeginOffset > NewAllocaBeginOffset ||
+ EndOffset < NewAllocaEndOffset)
+ return false;
+ auto *C = cast<ConstantInt>(II.getLength());
+ if (C->getBitWidth() > 64)
+ return false;
+ const auto Len = C->getZExtValue();
+ auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
+ auto *SrcTy = VectorType::get(Int8Ty, Len);
+ return canConvertValue(DL, SrcTy, AllocaTy) &&
+ DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy));
+ }();
// If this doesn't map cleanly onto the alloca type, and that type isn't
// a single value type, just emit a memset.
- if (!VecTy && !IntTy &&
- (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
- SliceSize != DL.getTypeStoreSize(AllocaTy) ||
- !AllocaTy->isSingleValueType() ||
- !DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy)) ||
- DL.getTypeSizeInBits(ScalarTy) % 8 != 0)) {
+ if (!CanContinue) {
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
CallInst *New = IRB.CreateMemSet(
@@ -2774,8 +2820,8 @@ private:
if (NumElements > 1)
Splat = getVectorSplat(Splat, NumElements);
- Value *Old =
- IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
+ Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "oldload");
V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
} else if (IntTy) {
// If this is a memset on an alloca where we can widen stores, insert the
@@ -2787,8 +2833,8 @@ private:
if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
EndOffset != NewAllocaBeginOffset)) {
- Value *Old =
- IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
+ Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "oldload");
Old = convertValue(DL, IRB, Old, IntTy);
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
V = insertInteger(DL, IRB, Old, V, Offset, "insert");
@@ -2948,18 +2994,18 @@ private:
// Reset the other pointer type to match the register type we're going to
// use, but using the address space of the original other pointer.
+ Type *OtherTy;
if (VecTy && !IsWholeAlloca) {
if (NumElements == 1)
- OtherPtrTy = VecTy->getElementType();
+ OtherTy = VecTy->getElementType();
else
- OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements);
-
- OtherPtrTy = OtherPtrTy->getPointerTo(OtherAS);
+ OtherTy = VectorType::get(VecTy->getElementType(), NumElements);
} else if (IntTy && !IsWholeAlloca) {
- OtherPtrTy = SubIntTy->getPointerTo(OtherAS);
+ OtherTy = SubIntTy;
} else {
- OtherPtrTy = NewAllocaTy->getPointerTo(OtherAS);
+ OtherTy = NewAllocaTy;
}
+ OtherPtrTy = OtherTy->getPointerTo(OtherAS);
Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
OtherPtr->getName() + ".");
@@ -2973,28 +3019,30 @@ private:
Value *Src;
if (VecTy && !IsWholeAlloca && !IsDest) {
- Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
+ Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "load");
Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
} else if (IntTy && !IsWholeAlloca && !IsDest) {
- Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
+ Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "load");
Src = convertValue(DL, IRB, Src, IntTy);
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
} else {
- LoadInst *Load = IRB.CreateAlignedLoad(SrcPtr, SrcAlign, II.isVolatile(),
- "copyload");
+ LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
+ II.isVolatile(), "copyload");
if (AATags)
Load->setAAMetadata(AATags);
Src = Load;
}
if (VecTy && !IsWholeAlloca && IsDest) {
- Value *Old =
- IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
+ Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "oldload");
Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
} else if (IntTy && !IsWholeAlloca && IsDest) {
- Value *Old =
- IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
+ Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "oldload");
Old = convertValue(DL, IRB, Old, IntTy);
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
Src = insertInteger(DL, IRB, Old, Src, Offset, "insert");
@@ -3031,7 +3079,10 @@ private:
ConstantInt *Size =
ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
NewEndOffset - NewBeginOffset);
- Value *Ptr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
+ // Lifetime intrinsics always expect an i8* so directly get such a pointer
+ // for the new alloca slice.
+ Type *PointerTy = IRB.getInt8PtrTy(OldPtr->getType()->getPointerAddressSpace());
+ Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
Value *New;
if (II.getIntrinsicID() == Intrinsic::lifetime_start)
New = IRB.CreateLifetimeStart(Ptr, Size);
@@ -3072,8 +3123,9 @@ private:
continue;
}
- assert(isa<BitCastInst>(I) || isa<PHINode>(I) ||
- isa<SelectInst>(I) || isa<GetElementPtrInst>(I));
+ assert(isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) ||
+ isa<PHINode>(I) || isa<SelectInst>(I) ||
+ isa<GetElementPtrInst>(I));
for (User *U : I->users())
if (Visited.insert(cast<Instruction>(U)).second)
Uses.push_back(cast<Instruction>(U));
@@ -3297,8 +3349,8 @@ private:
assert(Ty->isSingleValueType());
// Load the single value and insert it using the indices.
Value *GEP =
- IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
- LoadInst *Load = IRB.CreateAlignedLoad(GEP, Align, Name + ".load");
+ IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
+ LoadInst *Load = IRB.CreateAlignedLoad(Ty, GEP, Align, Name + ".load");
if (AATags)
Load->setAAMetadata(AATags);
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
@@ -3342,7 +3394,7 @@ private:
Value *ExtractValue =
IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
Value *InBoundsGEP =
- IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
+ IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
StoreInst *Store =
IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Align);
if (AATags)
@@ -3374,6 +3426,11 @@ private:
return false;
}
+ bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
+ enqueueUsers(ASC);
+ return false;
+ }
+
bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
enqueueUsers(GEPI);
return false;
@@ -3792,6 +3849,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
auto AS = LI->getPointerAddressSpace();
auto *PartPtrTy = PartTy->getPointerTo(AS);
LoadInst *PLoad = IRB.CreateAlignedLoad(
+ PartTy,
getAdjustedPtr(IRB, DL, BasePtr,
APInt(DL.getIndexSizeInBits(AS), PartOffset),
PartPtrTy, BasePtr->getName() + "."),
@@ -3933,6 +3991,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
IRB.SetInsertPoint(LI);
auto AS = LI->getPointerAddressSpace();
PLoad = IRB.CreateAlignedLoad(
+ PartTy,
getAdjustedPtr(IRB, DL, LoadBasePtr,
APInt(DL.getIndexSizeInBits(AS), PartOffset),
LoadPartPtrTy, LoadBasePtr->getName() + "."),
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 976daf4c78c2..869cf00e0a89 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -1,9 +1,8 @@
//===-- Scalar.cpp --------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -63,6 +62,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeJumpThreadingPass(Registry);
initializeLegacyLICMPassPass(Registry);
initializeLegacyLoopSinkPassPass(Registry);
+ initializeLoopFuseLegacyPass(Registry);
initializeLoopDataPrefetchLegacyPassPass(Registry);
initializeLoopDeletionLegacyPassPass(Registry);
initializeLoopAccessLegacyAnalysisPass(Registry);
@@ -81,8 +81,9 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLowerAtomicLegacyPassPass(Registry);
initializeLowerExpectIntrinsicPass(Registry);
initializeLowerGuardIntrinsicLegacyPassPass(Registry);
+ initializeLowerWidenableConditionLegacyPassPass(Registry);
initializeMemCpyOptLegacyPassPass(Registry);
- initializeMergeICmpsPass(Registry);
+ initializeMergeICmpsLegacyPassPass(Registry);
initializeMergedLoadStoreMotionLegacyPassPass(Registry);
initializeNaryReassociateLegacyPassPass(Registry);
initializePartiallyInlineLibCallsLegacyPassPass(Registry);
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index 5eb3fdab6d5c..2ee1a3a95f2a 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -1,9 +1,8 @@
//===- Scalarizer.cpp - Scalarize vector operations -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -125,6 +124,18 @@ struct ICmpSplitter {
ICmpInst &ICI;
};
+// UnarySpliiter(UO)(Builder, X, Name) uses Builder to create
+// a unary operator like UO called Name with operand X.
+struct UnarySplitter {
+ UnarySplitter(UnaryOperator &uo) : UO(uo) {}
+
+ Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const {
+ return Builder.CreateUnOp(UO.getOpcode(), Op, Name);
+ }
+
+ UnaryOperator &UO;
+};
+
// BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
// a binary operator like BO called Name with operands X and Y.
struct BinarySplitter {
@@ -174,6 +185,7 @@ public:
bool visitSelectInst(SelectInst &SI);
bool visitICmpInst(ICmpInst &ICI);
bool visitFCmpInst(FCmpInst &FCI);
+ bool visitUnaryOperator(UnaryOperator &UO);
bool visitBinaryOperator(BinaryOperator &BO);
bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
bool visitCastInst(CastInst &CI);
@@ -188,11 +200,12 @@ private:
Scatterer scatter(Instruction *Point, Value *V);
void gather(Instruction *Op, const ValueVector &CV);
bool canTransferMetadata(unsigned Kind);
- void transferMetadata(Instruction *Op, const ValueVector &CV);
+ void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
bool getVectorLayout(Type *Ty, unsigned Alignment, VectorLayout &Layout,
const DataLayout &DL);
bool finish();
+ template<typename T> bool splitUnary(Instruction &, const T &);
template<typename T> bool splitBinary(Instruction &, const T &);
bool splitCall(CallInst &CI);
@@ -246,14 +259,13 @@ Value *Scatterer::operator[](unsigned I) {
return CV[I];
IRBuilder<> Builder(BB, BBI);
if (PtrTy) {
+ Type *ElTy = PtrTy->getElementType()->getVectorElementType();
if (!CV[0]) {
- Type *Ty =
- PointerType::get(PtrTy->getElementType()->getVectorElementType(),
- PtrTy->getAddressSpace());
- CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
+ Type *NewPtrTy = PointerType::get(ElTy, PtrTy->getAddressSpace());
+ CV[0] = Builder.CreateBitCast(V, NewPtrTy, V->getName() + ".i0");
}
if (I != 0)
- CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I,
+ CV[I] = Builder.CreateConstGEP1_32(ElTy, CV[0], I,
V->getName() + ".i" + Twine(I));
} else {
// Search through a chain of InsertElementInsts looking for element I.
@@ -349,7 +361,7 @@ void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I)
Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType()));
- transferMetadata(Op, CV);
+ transferMetadataAndIRFlags(Op, CV);
// If we already have a scattered form of Op (created from ExtractElements
// of Op itself), replace them with the new form.
@@ -385,7 +397,8 @@ bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
// Transfer metadata from Op to the instructions in CV if it is known
// to be safe to do so.
-void ScalarizerVisitor::transferMetadata(Instruction *Op, const ValueVector &CV) {
+void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
+ const ValueVector &CV) {
SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
Op->getAllMetadataOtherThanDebugLoc(MDs);
for (unsigned I = 0, E = CV.size(); I != E; ++I) {
@@ -393,6 +406,7 @@ void ScalarizerVisitor::transferMetadata(Instruction *Op, const ValueVector &CV)
for (const auto &MD : MDs)
if (canTransferMetadata(MD.first))
New->setMetadata(MD.first, MD.second);
+ New->copyIRFlags(Op);
if (Op->getDebugLoc() && !New->getDebugLoc())
New->setDebugLoc(Op->getDebugLoc());
}
@@ -410,8 +424,7 @@ bool ScalarizerVisitor::getVectorLayout(Type *Ty, unsigned Alignment,
// Check that we're dealing with full-byte elements.
Layout.ElemTy = Layout.VecTy->getElementType();
- if (DL.getTypeSizeInBits(Layout.ElemTy) !=
- DL.getTypeStoreSizeInBits(Layout.ElemTy))
+ if (!DL.typeSizeEqualsStoreSize(Layout.ElemTy))
return false;
if (Alignment)
@@ -422,6 +435,26 @@ bool ScalarizerVisitor::getVectorLayout(Type *Ty, unsigned Alignment,
return true;
}
+// Scalarize one-operand instruction I, using Split(Builder, X, Name)
+// to create an instruction like I with operand X and name Name.
+template<typename Splitter>
+bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
+ VectorType *VT = dyn_cast<VectorType>(I.getType());
+ if (!VT)
+ return false;
+
+ unsigned NumElems = VT->getNumElements();
+ IRBuilder<> Builder(&I);
+ Scatterer Op = scatter(&I, I.getOperand(0));
+ assert(Op.size() == NumElems && "Mismatched unary operation");
+ ValueVector Res;
+ Res.resize(NumElems);
+ for (unsigned Elem = 0; Elem < NumElems; ++Elem)
+ Res[Elem] = Split(Builder, Op[Elem], I.getName() + ".i" + Twine(Elem));
+ gather(&I, Res);
+ return true;
+}
+
// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
// to create an instruction like I with operands X and Y and name Name.
template<typename Splitter>
@@ -554,6 +587,10 @@ bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
return splitBinary(FCI, FCmpSplitter(FCI));
}
+bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
+ return splitUnary(UO, UnarySplitter(UO));
+}
+
bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
return splitBinary(BO, BinarySplitter(BO));
}
@@ -744,7 +781,8 @@ bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
Res.resize(NumElems);
for (unsigned I = 0; I < NumElems; ++I)
- Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I),
+ Res[I] = Builder.CreateAlignedLoad(Layout.VecTy->getElementType(), Ptr[I],
+ Layout.getElemAlign(I),
LI.getName() + ".i" + Twine(I));
gather(&LI, Res);
return true;
@@ -773,7 +811,7 @@ bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
unsigned Align = Layout.getElemAlign(I);
Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align);
}
- transferMetadata(&SI, Stores);
+ transferMetadataAndIRFlags(&SI, Stores);
return true;
}
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 967f4a42a8fb..f6a12fb13142 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1,9 +1,8 @@
//===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 5a67178cef37..aeac6f548b32 100644
--- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -1,9 +1,8 @@
///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -181,14 +180,9 @@ static void buildPartialUnswitchConditionalBranch(BasicBlock &BB,
BasicBlock &UnswitchedSucc,
BasicBlock &NormalSucc) {
IRBuilder<> IRB(&BB);
- Value *Cond = Invariants.front();
- for (Value *Invariant :
- make_range(std::next(Invariants.begin()), Invariants.end()))
- if (Direction)
- Cond = IRB.CreateOr(Cond, Invariant);
- else
- Cond = IRB.CreateAnd(Cond, Invariant);
-
+
+ Value *Cond = Direction ? IRB.CreateOr(Invariants) :
+ IRB.CreateAnd(Invariants);
IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
Direction ? &NormalSucc : &UnswitchedSucc);
}
@@ -268,7 +262,8 @@ static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
/// loops reachable and need to move the current loop up the loop nest or even
/// to an entirely separate nest.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
- DominatorTree &DT, LoopInfo &LI) {
+ DominatorTree &DT, LoopInfo &LI,
+ MemorySSAUpdater *MSSAU) {
// If the loop is already at the top level, we can't hoist it anywhere.
Loop *OldParentL = L.getParentLoop();
if (!OldParentL)
@@ -329,7 +324,8 @@ static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
// unswitching it is possible to get new non-dedicated exits out of parent
// loop so let's conservatively form dedicated exit blocks and figure out
// if we can optimize later.
- formDedicatedExitBlocks(OldContainingL, &DT, &LI, /*PreserveLCSSA*/ true);
+ formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
+ /*PreserveLCSSA*/ true);
}
}
@@ -536,7 +532,10 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
// If this was full unswitching, we may have changed the nesting relationship
// for this loop so hoist it to its correct parent if needed.
if (FullUnswitch)
- hoistLoopToNewParent(L, *NewPH, DT, LI);
+ hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU);
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
++NumTrivial;
@@ -590,11 +589,13 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
ExitCaseIndices.push_back(Case.getCaseIndex());
}
BasicBlock *DefaultExitBB = nullptr;
+ SwitchInstProfUpdateWrapper::CaseWeightOpt DefaultCaseWeight =
+ SwitchInstProfUpdateWrapper::getSuccessorWeight(SI, 0);
if (!L.contains(SI.getDefaultDest()) &&
areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) &&
- !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator()))
+ !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator())) {
DefaultExitBB = SI.getDefaultDest();
- else if (ExitCaseIndices.empty())
+ } else if (ExitCaseIndices.empty())
return false;
LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
@@ -618,8 +619,11 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
// Store the exit cases into a separate data structure and remove them from
// the switch.
- SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases;
+ SmallVector<std::tuple<ConstantInt *, BasicBlock *,
+ SwitchInstProfUpdateWrapper::CaseWeightOpt>,
+ 4> ExitCases;
ExitCases.reserve(ExitCaseIndices.size());
+ SwitchInstProfUpdateWrapper SIW(SI);
// We walk the case indices backwards so that we remove the last case first
// and don't disrupt the earlier indices.
for (unsigned Index : reverse(ExitCaseIndices)) {
@@ -629,9 +633,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
if (!ExitL || ExitL->contains(OuterL))
OuterL = ExitL;
// Save the value of this case.
- ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()});
+ auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
+ ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
// Delete the unswitched cases.
- SI.removeCase(CaseI);
+ SIW.removeCase(CaseI);
}
if (SE) {
@@ -669,6 +674,7 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
// Now add the unswitched switch.
auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
+ SwitchInstProfUpdateWrapper NewSIW(*NewSI);
// Rewrite the IR for the unswitched basic blocks. This requires two steps.
// First, we split any exit blocks with remaining in-loop predecessors. Then
@@ -696,9 +702,9 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
}
// Note that we must use a reference in the for loop so that we update the
// container.
- for (auto &CasePair : reverse(ExitCases)) {
+ for (auto &ExitCase : reverse(ExitCases)) {
// Grab a reference to the exit block in the pair so that we can update it.
- BasicBlock *ExitBB = CasePair.second;
+ BasicBlock *ExitBB = std::get<1>(ExitCase);
// If this case is the last edge into the exit block, we can simply reuse it
// as it will no longer be a loop exit. No mapping necessary.
@@ -720,27 +726,39 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
/*FullUnswitch*/ true);
}
// Update the case pair to point to the split block.
- CasePair.second = SplitExitBB;
+ std::get<1>(ExitCase) = SplitExitBB;
}
// Now add the unswitched cases. We do this in reverse order as we built them
// in reverse order.
- for (auto CasePair : reverse(ExitCases)) {
- ConstantInt *CaseVal = CasePair.first;
- BasicBlock *UnswitchedBB = CasePair.second;
+ for (auto &ExitCase : reverse(ExitCases)) {
+ ConstantInt *CaseVal = std::get<0>(ExitCase);
+ BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
- NewSI->addCase(CaseVal, UnswitchedBB);
+ NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
}
// If the default was unswitched, re-point it and add explicit cases for
// entering the loop.
if (DefaultExitBB) {
- NewSI->setDefaultDest(DefaultExitBB);
+ NewSIW->setDefaultDest(DefaultExitBB);
+ NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
// We removed all the exit cases, so we just copy the cases to the
// unswitched switch.
- for (auto Case : SI.cases())
- NewSI->addCase(Case.getCaseValue(), NewPH);
+ for (const auto &Case : SI.cases())
+ NewSIW.addCase(Case.getCaseValue(), NewPH,
+ SIW.getSuccessorWeight(Case.getSuccessorIndex()));
+ } else if (DefaultCaseWeight) {
+ // We have to set branch weight of the default case.
+ uint64_t SW = *DefaultCaseWeight;
+ for (const auto &Case : SI.cases()) {
+ auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
+ assert(W &&
+ "case weight must be defined as default case weight is defined");
+ SW += *W;
+ }
+ NewSIW.setSuccessorWeight(0, SW);
}
// If we ended up with a common successor for every path through the switch
@@ -762,10 +780,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
continue;
}
CommonSuccBB->removePredecessor(BB,
- /*DontDeleteUselessPHIs*/ true);
+ /*KeepOneInputPHIs*/ true);
}
// Now nuke the switch and replace it with a direct branch.
- SI.eraseFromParent();
+ SIW.eraseFromParent();
BranchInst::Create(CommonSuccBB, BB);
} else if (DefaultExitBB) {
assert(SI.getNumCases() > 0 &&
@@ -775,8 +793,11 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
// being simple and keeping the number of edges from this switch to
// successors the same, and avoiding any PHI update complexity.
auto LastCaseI = std::prev(SI.case_end());
+
SI.setDefaultDest(LastCaseI->getCaseSuccessor());
- SI.removeCase(LastCaseI);
+ SIW.setSuccessorWeight(
+ 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
+ SIW.removeCase(LastCaseI);
}
// Walk the unswitched exit blocks and the unswitched split blocks and update
@@ -789,9 +810,8 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
}
for (auto SplitUnswitchedPair : SplitExitBBMap) {
- auto *UnswitchedBB = SplitUnswitchedPair.second;
- DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedBB});
- DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB});
+ DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
+ DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
}
DT.applyUpdates(DTUpdates);
@@ -805,7 +825,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
// We may have changed the nesting relationship for this loop so hoist it to
// its correct parent if needed.
- hoistLoopToNewParent(L, *NewPH, DT, LI);
+ hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU);
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
++NumTrivial;
++NumSwitches;
@@ -848,6 +871,10 @@ static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
// Check if there are any side-effecting instructions (e.g. stores, calls,
// volatile loads) in the part of the loop that the code *would* execute
// without unswitching.
+ if (MSSAU) // Possible early exit with MSSA
+ if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
+ if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
+ return Changed;
if (llvm::any_of(*CurrentBB,
[](Instruction &I) { return I.mayHaveSideEffects(); }))
return Changed;
@@ -1066,7 +1093,7 @@ static BasicBlock *buildClonedLoopBlocks(
continue;
ClonedSuccBB->removePredecessor(ClonedParentBB,
- /*DontDeleteUselessPHIs*/ true);
+ /*KeepOneInputPHIs*/ true);
}
// Replace the cloned branch with an unconditional branch to the cloned
@@ -1436,8 +1463,8 @@ deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
// Remove all MemorySSA in the dead blocks
if (MSSAU) {
- SmallPtrSet<BasicBlock *, 16> DeadBlockSet(DeadBlocks.begin(),
- DeadBlocks.end());
+ SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
+ DeadBlocks.end());
MSSAU->removeBlocks(DeadBlockSet);
}
@@ -1455,7 +1482,7 @@ static void deleteDeadBlocksFromLoop(Loop &L,
MemorySSAUpdater *MSSAU) {
// Find all the dead blocks tied to this loop, and remove them from their
// successors.
- SmallPtrSet<BasicBlock *, 16> DeadBlockSet;
+ SmallSetVector<BasicBlock *, 8> DeadBlockSet;
// Start with loop/exit blocks and get a transitive closure of reachable dead
// blocks.
@@ -1712,10 +1739,9 @@ static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
// Sort the exits in ascending loop depth, we'll work backwards across these
// to process them inside out.
- std::stable_sort(ExitsInLoops.begin(), ExitsInLoops.end(),
- [&](BasicBlock *LHS, BasicBlock *RHS) {
- return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
- });
+ llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
+ return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
+ });
// We'll build up a set for each exit loop.
SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
@@ -2075,7 +2101,7 @@ static void unswitchNontrivialInvariants(
"Only one possible unswitched block for a branch!");
BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
UnswitchedSuccBB->removePredecessor(ParentBB,
- /*DontDeleteUselessPHIs*/ true);
+ /*KeepOneInputPHIs*/ true);
DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
} else {
// Note that we actually want to remove the parent block as a predecessor
@@ -2090,7 +2116,7 @@ static void unswitchNontrivialInvariants(
for (auto &Case : NewSI->cases())
Case.getCaseSuccessor()->removePredecessor(
ParentBB,
- /*DontDeleteUselessPHIs*/ true);
+ /*KeepOneInputPHIs*/ true);
// We need to use the set to populate domtree updates as even when there
// are multiple cases pointing at the same successor we only want to
@@ -2236,7 +2262,7 @@ static void unswitchNontrivialInvariants(
// introduced new, non-dedicated exits. At least try to re-form dedicated
// exits for these loops. This may fail if they couldn't have dedicated
// exits to start with.
- formDedicatedExitBlocks(&UpdateL, &DT, &LI, /*PreserveLCSSA*/ true);
+ formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
};
// For non-child cloned loops and hoisted loops, we just need to update LCSSA
@@ -2526,7 +2552,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
// We can only consider fully loop-invariant switch conditions as we need
// to completely eliminate the switch after unswitching.
if (!isa<Constant>(SI->getCondition()) &&
- L.isLoopInvariant(SI->getCondition()))
+ L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
UnswitchCandidates.push_back({SI, {SI->getCondition()}});
continue;
}
@@ -2852,7 +2878,11 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
// Historically this pass has had issues with the dominator tree so verify it
// in asserts builds.
assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
- return getLoopPassPreservedAnalyses();
+
+ auto PA = getLoopPassPreservedAnalyses();
+ if (EnableMSSALoopDependency)
+ PA.preserve<MemorySSAAnalysis>();
+ return PA;
}
namespace {
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index b7b1db76b492..4544975a4887 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -1,9 +1,8 @@
//===- SimplifyCFGPass.cpp - CFG Simplification Pass ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index c99da8f0737a..90f3a2aa46e1 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -1,9 +1,8 @@
//===-- Sink.cpp - Code Sinking -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
index c0f75ddddbe0..c13fb3e04516 100644
--- a/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
+++ b/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
@@ -1,9 +1,8 @@
//===- SpeculateAroundPHIs.cpp --------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -68,6 +67,14 @@ isSafeToSpeculatePHIUsers(PHINode &PN, DominatorTree &DT,
return false;
}
+ if (auto CS = ImmutableCallSite(UI)) {
+ if (CS.isConvergent() || CS.cannotDuplicate()) {
+ LLVM_DEBUG(dbgs() << " Unsafe: convergent "
+ "callsite cannot de duplicated: " << *UI << '\n');
+ return false;
+ }
+ }
+
// FIXME: This check is much too conservative. We're not going to move these
// instructions onto new dynamic paths through the program unless there is
// a call instruction between the use and the PHI node. And memory isn't
diff --git a/lib/Transforms/Scalar/SpeculativeExecution.cpp b/lib/Transforms/Scalar/SpeculativeExecution.cpp
index f5e1dd6ed850..f9d027eb4a3b 100644
--- a/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -1,9 +1,8 @@
//===- SpeculativeExecution.cpp ---------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -241,6 +240,7 @@ static unsigned ComputeSpeculationCost(const Instruction *I,
case Instruction::FMul:
case Instruction::FDiv:
case Instruction::FRem:
+ case Instruction::FNeg:
case Instruction::ICmp:
case Instruction::FCmp:
return TTI.getUserCost(I);
diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index b5089b006bdd..a58c32cc5894 100644
--- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -1,9 +1,8 @@
//===- StraightLineStrengthReduce.cpp - -----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -683,9 +682,13 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
// Canonicalize bump to pointer size.
Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy);
if (InBounds)
- Reduced = Builder.CreateInBoundsGEP(nullptr, Basis.Ins, Bump);
+ Reduced = Builder.CreateInBoundsGEP(
+ cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(),
+ Basis.Ins, Bump);
else
- Reduced = Builder.CreateGEP(nullptr, Basis.Ins, Bump);
+ Reduced = Builder.CreateGEP(
+ cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(),
+ Basis.Ins, Bump);
}
break;
}
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 0db762d846f2..e5400676c7e8 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -1,9 +1,8 @@
//===- StructurizeCFG.cpp -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -63,6 +62,11 @@ static cl::opt<bool> ForceSkipUniformRegions(
cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
cl::init(false));
+static cl::opt<bool>
+ RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
+ cl::desc("Allow relaxed uniform region checks"),
+ cl::init(false));
+
// Definition of the complex types used in this pass.
using BBValuePair = std::pair<BasicBlock *, Value *>;
@@ -624,11 +628,8 @@ void StructurizeCFG::setPhiValues() {
if (!Dominator.resultIsRememberedBlock())
Updater.AddAvailableValue(Dominator.result(), Undef);
- for (BasicBlock *FI : From) {
- int Idx = Phi->getBasicBlockIndex(FI);
- assert(Idx != -1);
- Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(FI));
- }
+ for (BasicBlock *FI : From)
+ Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
}
DeletedPhis.erase(To);
@@ -937,6 +938,11 @@ void StructurizeCFG::rebuildSSA() {
static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
const LegacyDivergenceAnalysis &DA) {
+ // Bool for if all sub-regions are uniform.
+ bool SubRegionsAreUniform = true;
+ // Count of how many direct children are conditional.
+ unsigned ConditionalDirectChildren = 0;
+
for (auto E : R->elements()) {
if (!E->isSubRegion()) {
auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
@@ -945,6 +951,10 @@ static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
if (!DA.isUniform(Br))
return false;
+
+ // One of our direct children is conditional.
+ ConditionalDirectChildren++;
+
LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
<< " has uniform terminator\n");
} else {
@@ -962,12 +972,25 @@ static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
if (!Br || !Br->isConditional())
continue;
- if (!Br->getMetadata(UniformMDKindID))
- return false;
+ if (!Br->getMetadata(UniformMDKindID)) {
+ // Early exit if we cannot have relaxed uniform regions.
+ if (!RelaxedUniformRegions)
+ return false;
+
+ SubRegionsAreUniform = false;
+ break;
+ }
}
}
}
- return true;
+
+ // Our region is uniform if:
+ // 1. All conditional branches that are direct children are uniform (checked
+ // above).
+ // 2. And either:
+ // a. All sub-regions are uniform.
+ // b. There is one or less conditional branches among the direct children.
+ return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
}
/// Run the transformation for each region found
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 0f6db21f73b6..f0b79079d817 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -1,9 +1,8 @@
//===- TailRecursionElimination.cpp - Eliminate Tail Calls ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -56,6 +55,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -69,7 +69,6 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
@@ -341,7 +340,7 @@ static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) {
// being loaded from.
const DataLayout &DL = L->getModule()->getDataLayout();
if (isModSet(AA->getModRefInfo(CI, MemoryLocation::get(L))) ||
- !isSafeToLoadUnconditionally(L->getPointerOperand(),
+ !isSafeToLoadUnconditionally(L->getPointerOperand(), L->getType(),
L->getAlignment(), DL, L))
return false;
}
@@ -679,7 +678,7 @@ static bool eliminateRecursiveTailCall(
BB->getInstList().erase(Ret); // Remove return.
BB->getInstList().erase(CI); // Remove call.
- DTU.insertEdge(BB, OldEntry);
+ DTU.applyUpdates({{DominatorTree::Insert, BB, OldEntry}});
++NumEliminated;
return true;
}
diff --git a/lib/Transforms/Scalar/WarnMissedTransforms.cpp b/lib/Transforms/Scalar/WarnMissedTransforms.cpp
index 80f761e53774..707adf46d1f4 100644
--- a/lib/Transforms/Scalar/WarnMissedTransforms.cpp
+++ b/lib/Transforms/Scalar/WarnMissedTransforms.cpp
@@ -1,9 +1,8 @@
//===- LoopTransformWarning.cpp - ----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -93,7 +92,7 @@ PreservedAnalyses
WarnMissedTransformationsPass::run(Function &F, FunctionAnalysisManager &AM) {
// Do not warn about not applied transformations if optimizations are
// disabled.
- if (F.hasFnAttribute(Attribute::OptimizeNone))
+ if (F.hasOptNone())
return PreservedAnalyses::all();
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
diff --git a/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
index 364878dc588d..01912297324a 100644
--- a/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -1,9 +1,8 @@
//===-- ASanStackFrameLayout.cpp - helper for AddressSanitizer ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -63,7 +62,7 @@ ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
for (size_t i = 0; i < NumVars; i++)
Vars[i].Alignment = std::max(Vars[i].Alignment, kMinAlignment);
- std::stable_sort(Vars.begin(), Vars.end(), CompareVars);
+ llvm::stable_sort(Vars, CompareVars);
ASanStackFrameLayout Layout;
Layout.Granularity = Granularity;
diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp
index 564537af0c2a..ee0973002c47 100644
--- a/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -1,9 +1,8 @@
//===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -209,7 +208,7 @@ static bool addDiscriminators(Function &F) {
// Only the lowest 7 bits are used to represent a discriminator to fit
// it in 1 byte ULEB128 representation.
unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
- auto NewDIL = DIL->setBaseDiscriminator(Discriminator);
+ auto NewDIL = DIL->cloneWithBaseDiscriminator(Discriminator);
if (!NewDIL) {
LLVM_DEBUG(dbgs() << "Could not encode discriminator: "
<< DIL->getFilename() << ":" << DIL->getLine() << ":"
@@ -246,7 +245,7 @@ static bool addDiscriminators(Function &F) {
std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
if (!CallLocations.insert(L).second) {
unsigned Discriminator = ++LDM[L];
- auto NewDIL = CurrentDIL->setBaseDiscriminator(Discriminator);
+ auto NewDIL = CurrentDIL->cloneWithBaseDiscriminator(Discriminator);
if (!NewDIL) {
LLVM_DEBUG(dbgs()
<< "Could not encode discriminator: "
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 7da768252fc1..5fa371377c85 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -1,9 +1,8 @@
//===- BasicBlockUtils.cpp - BasicBlock Utilities --------------------------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
@@ -26,7 +26,6 @@
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -39,6 +38,8 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
@@ -48,30 +49,20 @@
using namespace llvm;
-void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU) {
- SmallVector<BasicBlock *, 1> BBs = {BB};
- DeleteDeadBlocks(BBs, DTU);
-}
-
-void llvm::DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
- DomTreeUpdater *DTU) {
-#ifndef NDEBUG
- // Make sure that all predecessors of each dead block is also dead.
- SmallPtrSet<BasicBlock *, 4> Dead(BBs.begin(), BBs.end());
- assert(Dead.size() == BBs.size() && "Duplicating blocks?");
- for (auto *BB : Dead)
- for (BasicBlock *Pred : predecessors(BB))
- assert(Dead.count(Pred) && "All predecessors must be dead!");
-#endif
+#define DEBUG_TYPE "basicblock-utils"
- SmallVector<DominatorTree::UpdateType, 4> Updates;
+void llvm::DetatchDeadBlocks(
+ ArrayRef<BasicBlock *> BBs,
+ SmallVectorImpl<DominatorTree::UpdateType> *Updates,
+ bool KeepOneInputPHIs) {
for (auto *BB : BBs) {
// Loop through all of our successors and make sure they know that one
// of their predecessors is going away.
+ SmallPtrSet<BasicBlock *, 4> UniqueSuccessors;
for (BasicBlock *Succ : successors(BB)) {
- Succ->removePredecessor(BB);
- if (DTU)
- Updates.push_back({DominatorTree::Delete, BB, Succ});
+ Succ->removePredecessor(BB, KeepOneInputPHIs);
+ if (Updates && UniqueSuccessors.insert(Succ).second)
+ Updates->push_back({DominatorTree::Delete, BB, Succ});
}
// Zap all the instructions in the block.
@@ -92,8 +83,29 @@ void llvm::DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
"The successor list of BB isn't empty before "
"applying corresponding DTU updates.");
}
+}
+
+void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU,
+ bool KeepOneInputPHIs) {
+ DeleteDeadBlocks({BB}, DTU, KeepOneInputPHIs);
+}
+
+void llvm::DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs, DomTreeUpdater *DTU,
+ bool KeepOneInputPHIs) {
+#ifndef NDEBUG
+ // Make sure that all predecessors of each dead block is also dead.
+ SmallPtrSet<BasicBlock *, 4> Dead(BBs.begin(), BBs.end());
+ assert(Dead.size() == BBs.size() && "Duplicating blocks?");
+ for (auto *BB : Dead)
+ for (BasicBlock *Pred : predecessors(BB))
+ assert(Dead.count(Pred) && "All predecessors must be dead!");
+#endif
+
+ SmallVector<DominatorTree::UpdateType, 4> Updates;
+ DetatchDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs);
+
if (DTU)
- DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->applyUpdatesPermissive(Updates);
for (BasicBlock *BB : BBs)
if (DTU)
@@ -102,6 +114,28 @@ void llvm::DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
BB->eraseFromParent();
}
+bool llvm::EliminateUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
+ bool KeepOneInputPHIs) {
+ df_iterator_default_set<BasicBlock*> Reachable;
+
+ // Mark all reachable blocks.
+ for (BasicBlock *BB : depth_first_ext(&F, Reachable))
+ (void)BB/* Mark all reachable blocks */;
+
+ // Collect all dead blocks.
+ std::vector<BasicBlock*> DeadBlocks;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ if (!Reachable.count(&*I)) {
+ BasicBlock *BB = &*I;
+ DeadBlocks.push_back(BB);
+ }
+
+ // Delete the dead blocks.
+ DeleteDeadBlocks(DeadBlocks, DTU, KeepOneInputPHIs);
+
+ return !DeadBlocks.empty();
+}
+
void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
MemoryDependenceResults *MemDep) {
if (!isa<PHINode>(BB->begin())) return;
@@ -160,6 +194,9 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
if (IncValue == &PN)
return false;
+ LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into "
+ << PredBB->getName() << "\n");
+
// Begin by getting rid of unneeded PHIs.
SmallVector<AssertingVH<Value>, 4> IncomingValues;
if (isa<PHINode>(BB->front())) {
@@ -175,11 +212,19 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
std::vector<DominatorTree::UpdateType> Updates;
if (DTU) {
Updates.reserve(1 + (2 * succ_size(BB)));
- Updates.push_back({DominatorTree::Delete, PredBB, BB});
- for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ // Add insert edges first. Experimentally, for the particular case of two
+ // blocks that can be merged, with a single successor and single predecessor
+ // respectively, it is beneficial to have all insert updates first. Deleting
+ // edges first may lead to unreachable blocks, followed by inserting edges
+ // making the blocks reachable again. Such DT updates lead to high compile
+ // times. We add inserts before deletes here to reduce compile time.
+ for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
+ // This successor of BB may already have PredBB as a predecessor.
+ if (llvm::find(successors(PredBB), *I) == succ_end(PredBB))
+ Updates.push_back({DominatorTree::Insert, PredBB, *I});
+ for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
Updates.push_back({DominatorTree::Delete, BB, *I});
- Updates.push_back({DominatorTree::Insert, PredBB, *I});
- }
+ Updates.push_back({DominatorTree::Delete, PredBB, BB});
}
if (MSSAU)
@@ -227,7 +272,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
isa<UnreachableInst>(BB->getTerminator()) &&
"The successor list of BB isn't empty before "
"applying corresponding DTU updates.");
- DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->applyUpdatesPermissive(Updates);
DTU->deleteBB(BB);
}
@@ -534,7 +579,13 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// The new block unconditionally branches to the old block.
BranchInst *BI = BranchInst::Create(BB, NewBB);
- BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
+ // Splitting the predecessors of a loop header creates a preheader block.
+ if (LI && LI->isLoopHeader(BB))
+ // Using the loop start line number prevents debuggers stepping into the
+ // loop body for this instruction.
+ BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc());
+ else
+ BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
// Move the edges from Preds to point to NewBB instead of BB.
for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
@@ -543,6 +594,8 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// all BlockAddress uses would need to be updated.
assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
"Cannot split an edge from an IndirectBrInst");
+ assert(!isa<CallBrInst>(Preds[i]->getTerminator()) &&
+ "Cannot split an edge from a CallBrInst");
Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
}
@@ -711,7 +764,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
UncondBranch->eraseFromParent();
if (DTU)
- DTU->deleteEdge(Pred, BB);
+ DTU->applyUpdates({{DominatorTree::Delete, Pred, BB}});
return cast<ReturnInst>(NewRet);
}
@@ -720,18 +773,23 @@ Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
Instruction *SplitBefore,
bool Unreachable,
MDNode *BranchWeights,
- DominatorTree *DT, LoopInfo *LI) {
+ DominatorTree *DT, LoopInfo *LI,
+ BasicBlock *ThenBlock) {
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
Instruction *HeadOldTerm = Head->getTerminator();
LLVMContext &C = Head->getContext();
- BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
Instruction *CheckTerm;
- if (Unreachable)
- CheckTerm = new UnreachableInst(C, ThenBlock);
- else
- CheckTerm = BranchInst::Create(Tail, ThenBlock);
- CheckTerm->setDebugLoc(SplitBefore->getDebugLoc());
+ bool CreateThenBlock = (ThenBlock == nullptr);
+ if (CreateThenBlock) {
+ ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ if (Unreachable)
+ CheckTerm = new UnreachableInst(C, ThenBlock);
+ else
+ CheckTerm = BranchInst::Create(Tail, ThenBlock);
+ CheckTerm->setDebugLoc(SplitBefore->getDebugLoc());
+ } else
+ CheckTerm = ThenBlock->getTerminator();
BranchInst *HeadNewTerm =
BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond);
HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
@@ -746,7 +804,10 @@ Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
DT->changeImmediateDominator(Child, NewNode);
// Head dominates ThenBlock.
- DT->addNewBlock(ThenBlock, Head);
+ if (CreateThenBlock)
+ DT->addNewBlock(ThenBlock, Head);
+ else
+ DT->changeImmediateDominator(ThenBlock, Head);
}
}
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index fafc9aaba5c9..f5e4b53f6d97 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -1,9 +1,8 @@
//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,6 +23,7 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
@@ -49,10 +49,14 @@ namespace {
bool runOnFunction(Function &F) override {
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+
+ auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
+ auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
+
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
unsigned N =
- SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI));
+ SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI, nullptr, PDT));
NumBroken += N;
return N > 0;
}
@@ -145,6 +149,14 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
// it in this generic function.
if (DestBB->isEHPad()) return nullptr;
+ // Don't split the non-fallthrough edge from a callbr.
+ if (isa<CallBrInst>(TI) && SuccNum > 0)
+ return nullptr;
+
+ if (Options.IgnoreUnreachableDests &&
+ isa<UnreachableInst>(DestBB->getFirstNonPHIOrDbgOrLifetime()))
+ return nullptr;
+
// Create a new basic block, linking it into the CFG.
BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
@@ -189,7 +201,7 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
if (TI->getSuccessor(i) != DestBB) continue;
// Remove an entry for TIBB from DestBB phi nodes.
- DestBB->removePredecessor(TIBB, Options.DontDeleteUselessPHIs);
+ DestBB->removePredecessor(TIBB, Options.KeepOneInputPHIs);
// We found another edge to DestBB, go to NewBB instead.
TI->setSuccessor(i, NewBB);
@@ -198,16 +210,17 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
// If we have nothing to update, just return.
auto *DT = Options.DT;
+ auto *PDT = Options.PDT;
auto *LI = Options.LI;
auto *MSSAU = Options.MSSAU;
if (MSSAU)
MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
DestBB, NewBB, {TIBB}, Options.MergeIdenticalEdges);
- if (!DT && !LI)
+ if (!DT && !PDT && !LI)
return NewBB;
- if (DT) {
+ if (DT || PDT) {
// Update the DominatorTree.
// ---> NewBB -----\
// / V
@@ -223,7 +236,10 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
if (llvm::find(successors(TIBB), DestBB) == succ_end(TIBB))
Updates.push_back({DominatorTree::Delete, TIBB, DestBB});
- DT->applyUpdates(Updates);
+ if (DT)
+ DT->applyUpdates(Updates);
+ if (PDT)
+ PDT->applyUpdates(Updates);
}
// Update LoopInfo if it is around.
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 3466dedd3236..27f110e24f9c 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1,9 +1,8 @@
//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,6 +22,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
using namespace llvm;
@@ -121,6 +121,13 @@ static bool setNonLazyBind(Function &F) {
return true;
}
+static bool setDoesNotFreeMemory(Function &F) {
+ if (F.hasFnAttribute(Attribute::NoFree))
+ return false;
+ F.addFnAttr(Attribute::NoFree);
+ return true;
+}
+
bool llvm::inferLibFuncAttributes(Module *M, StringRef Name,
const TargetLibraryInfo &TLI) {
Function *F = M->getFunction(Name);
@@ -136,6 +143,9 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
bool Changed = false;
+ if(!isLibFreeFunction(&F, TheLibFunc) && !isReallocLikeFn(&F, &TLI))
+ Changed |= setDoesNotFreeMemory(F);
+
if (F.getParent() != nullptr && F.getParent()->getRtLibUseGOT())
Changed |= setNonLazyBind(F);
@@ -790,95 +800,76 @@ Value *llvm::castToCStr(Value *V, IRBuilder<> &B) {
return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr");
}
-Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_strlen))
+static Value *emitLibCall(LibFunc TheLibFunc, Type *ReturnType,
+ ArrayRef<Type *> ParamTypes,
+ ArrayRef<Value *> Operands, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI,
+ bool IsVaArgs = false) {
+ if (!TLI->has(TheLibFunc))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
- StringRef StrlenName = TLI->getName(LibFunc_strlen);
- LLVMContext &Context = B.GetInsertBlock()->getContext();
- Constant *StrLen = M->getOrInsertFunction(StrlenName, DL.getIntPtrType(Context),
- B.getInt8PtrTy());
- inferLibFuncAttributes(M, StrlenName, *TLI);
- CallInst *CI = B.CreateCall(StrLen, castToCStr(Ptr, B), StrlenName);
- if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
+ StringRef FuncName = TLI->getName(TheLibFunc);
+ FunctionType *FuncType = FunctionType::get(ReturnType, ParamTypes, IsVaArgs);
+ FunctionCallee Callee = M->getOrInsertFunction(FuncName, FuncType);
+ inferLibFuncAttributes(M, FuncName, *TLI);
+ CallInst *CI = B.CreateCall(Callee, Operands, FuncName);
+ if (const Function *F =
+ dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
-
return CI;
}
-Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_strchr))
- return nullptr;
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ return emitLibCall(LibFunc_strlen, DL.getIntPtrType(Context),
+ B.getInt8PtrTy(), castToCStr(Ptr, B), B, TLI);
+}
- Module *M = B.GetInsertBlock()->getModule();
- StringRef StrChrName = TLI->getName(LibFunc_strchr);
+Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
Type *I8Ptr = B.getInt8PtrTy();
Type *I32Ty = B.getInt32Ty();
- Constant *StrChr =
- M->getOrInsertFunction(StrChrName, I8Ptr, I8Ptr, I32Ty);
- inferLibFuncAttributes(M, StrChrName, *TLI);
- CallInst *CI = B.CreateCall(
- StrChr, {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, StrChrName);
- if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
- return CI;
+ return emitLibCall(LibFunc_strchr, I8Ptr, {I8Ptr, I32Ty},
+ {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, B, TLI);
}
Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_strncmp))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef StrNCmpName = TLI->getName(LibFunc_strncmp);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *StrNCmp = M->getOrInsertFunction(StrNCmpName, B.getInt32Ty(),
- B.getInt8PtrTy(), B.getInt8PtrTy(),
- DL.getIntPtrType(Context));
- inferLibFuncAttributes(M, StrNCmpName, *TLI);
- CallInst *CI = B.CreateCall(
- StrNCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, StrNCmpName);
-
- if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
+ return emitLibCall(
+ LibFunc_strncmp, B.getInt32Ty(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)},
+ {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
}
Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
- const TargetLibraryInfo *TLI, StringRef Name) {
- if (!TLI->has(LibFunc_strcpy))
- return nullptr;
+ const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ return emitLibCall(LibFunc_strcpy, I8Ptr, {I8Ptr, I8Ptr},
+ {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
+}
- Module *M = B.GetInsertBlock()->getModule();
+Value *llvm::emitStpCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
Type *I8Ptr = B.getInt8PtrTy();
- Value *StrCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr);
- inferLibFuncAttributes(M, Name, *TLI);
- CallInst *CI =
- B.CreateCall(StrCpy, {castToCStr(Dst, B), castToCStr(Src, B)}, Name);
- if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
- return CI;
+ return emitLibCall(LibFunc_stpcpy, I8Ptr, {I8Ptr, I8Ptr},
+ {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
}
Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
- const TargetLibraryInfo *TLI, StringRef Name) {
- if (!TLI->has(LibFunc_strncpy))
- return nullptr;
+ const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ return emitLibCall(LibFunc_strncpy, I8Ptr, {I8Ptr, I8Ptr, Len->getType()},
+ {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI);
+}
- Module *M = B.GetInsertBlock()->getModule();
+Value *llvm::emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
Type *I8Ptr = B.getInt8PtrTy();
- Value *StrNCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr,
- Len->getType());
- inferLibFuncAttributes(M, Name, *TLI);
- CallInst *CI = B.CreateCall(
- StrNCpy, {castToCStr(Dst, B), castToCStr(Src, B), Len}, Name);
- if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
- return CI;
+ return emitLibCall(LibFunc_stpncpy, I8Ptr, {I8Ptr, I8Ptr, Len->getType()},
+ {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI);
}
Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
@@ -892,57 +883,115 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
AS = AttributeList::get(M->getContext(), AttributeList::FunctionIndex,
Attribute::NoUnwind);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *MemCpy = M->getOrInsertFunction(
+ FunctionCallee MemCpy = M->getOrInsertFunction(
"__memcpy_chk", AttributeList::get(M->getContext(), AS), B.getInt8PtrTy(),
B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context),
DL.getIntPtrType(Context));
Dst = castToCStr(Dst, B);
Src = castToCStr(Src, B);
CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize});
- if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts()))
+ if (const Function *F =
+ dyn_cast<Function>(MemCpy.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
}
Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_memchr))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef MemChrName = TLI->getName(LibFunc_memchr);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *MemChr = M->getOrInsertFunction(MemChrName, B.getInt8PtrTy(),
- B.getInt8PtrTy(), B.getInt32Ty(),
- DL.getIntPtrType(Context));
- inferLibFuncAttributes(M, MemChrName, *TLI);
- CallInst *CI = B.CreateCall(MemChr, {castToCStr(Ptr, B), Val, Len}, MemChrName);
-
- if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
+ return emitLibCall(
+ LibFunc_memchr, B.getInt8PtrTy(),
+ {B.getInt8PtrTy(), B.getInt32Ty(), DL.getIntPtrType(Context)},
+ {castToCStr(Ptr, B), Val, Len}, B, TLI);
}
Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_memcmp))
- return nullptr;
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ return emitLibCall(
+ LibFunc_memcmp, B.getInt32Ty(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)},
+ {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
+}
- Module *M = B.GetInsertBlock()->getModule();
- StringRef MemCmpName = TLI->getName(LibFunc_memcmp);
+Value *llvm::emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *MemCmp = M->getOrInsertFunction(MemCmpName, B.getInt32Ty(),
- B.getInt8PtrTy(), B.getInt8PtrTy(),
- DL.getIntPtrType(Context));
- inferLibFuncAttributes(M, MemCmpName, *TLI);
- CallInst *CI = B.CreateCall(
- MemCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, MemCmpName);
-
- if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
+ return emitLibCall(
+ LibFunc_bcmp, B.getInt32Ty(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)},
+ {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
+}
- return CI;
+Value *llvm::emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len,
+ IRBuilder<> &B, const TargetLibraryInfo *TLI) {
+ return emitLibCall(
+ LibFunc_memccpy, B.getInt8PtrTy(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), Len->getType()},
+ {Ptr1, Ptr2, Val, Len}, B, TLI);
+}
+
+Value *llvm::emitSNPrintf(Value *Dest, Value *Size, Value *Fmt,
+ ArrayRef<Value *> VariadicArgs, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ SmallVector<Value *, 8> Args{castToCStr(Dest, B), Size, castToCStr(Fmt, B)};
+ Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end());
+ return emitLibCall(LibFunc_snprintf, B.getInt32Ty(),
+ {B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy()},
+ Args, B, TLI, /*IsVaArgs=*/true);
+}
+
+Value *llvm::emitSPrintf(Value *Dest, Value *Fmt,
+ ArrayRef<Value *> VariadicArgs, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ SmallVector<Value *, 8> Args{castToCStr(Dest, B), castToCStr(Fmt, B)};
+ Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end());
+ return emitLibCall(LibFunc_sprintf, B.getInt32Ty(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy()}, Args, B, TLI,
+ /*IsVaArgs=*/true);
+}
+
+Value *llvm::emitStrCat(Value *Dest, Value *Src, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ return emitLibCall(LibFunc_strcat, B.getInt8PtrTy(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy()},
+ {castToCStr(Dest, B), castToCStr(Src, B)}, B, TLI);
+}
+
+Value *llvm::emitStrLCpy(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ return emitLibCall(LibFunc_strlcpy, Size->getType(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()},
+ {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+}
+
+Value *llvm::emitStrLCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ return emitLibCall(LibFunc_strlcat, Size->getType(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()},
+ {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+}
+
+Value *llvm::emitStrNCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ return emitLibCall(LibFunc_strncat, B.getInt8PtrTy(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()},
+ {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+}
+
+Value *llvm::emitVSNPrintf(Value *Dest, Value *Size, Value *Fmt, Value *VAList,
+ IRBuilder<> &B, const TargetLibraryInfo *TLI) {
+ return emitLibCall(
+ LibFunc_vsnprintf, B.getInt32Ty(),
+ {B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy(), VAList->getType()},
+ {castToCStr(Dest, B), Size, castToCStr(Fmt, B), VAList}, B, TLI);
+}
+
+Value *llvm::emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList,
+ IRBuilder<> &B, const TargetLibraryInfo *TLI) {
+ return emitLibCall(LibFunc_vsprintf, B.getInt32Ty(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), VAList->getType()},
+ {castToCStr(Dest, B), castToCStr(Fmt, B), VAList}, B, TLI);
}
/// Append a suffix to the function name according to the type of 'Op'.
@@ -966,8 +1015,8 @@ static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name,
assert((Name != "") && "Must specify Name to emitUnaryFloatFnCall");
Module *M = B.GetInsertBlock()->getModule();
- Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
- Op->getType());
+ FunctionCallee Callee =
+ M->getOrInsertFunction(Name, Op->getType(), Op->getType());
CallInst *CI = B.CreateCall(Callee, Op, Name);
// The incoming attribute set may have come from a speculatable intrinsic, but
@@ -976,7 +1025,8 @@ static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name,
CI->setAttributes(Attrs.removeAttribute(B.getContext(),
AttributeList::FunctionIndex,
Attribute::Speculatable));
- if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ if (const Function *F =
+ dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
@@ -1009,11 +1059,12 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
appendTypeSuffix(Op1, Name, NameBuffer);
Module *M = B.GetInsertBlock()->getModule();
- Value *Callee = M->getOrInsertFunction(Name, Op1->getType(), Op1->getType(),
- Op2->getType());
+ FunctionCallee Callee = M->getOrInsertFunction(
+ Name, Op1->getType(), Op1->getType(), Op2->getType());
CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name);
CI->setAttributes(Attrs);
- if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ if (const Function *F =
+ dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
@@ -1026,7 +1077,8 @@ Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getModule();
StringRef PutCharName = TLI->getName(LibFunc_putchar);
- Value *PutChar = M->getOrInsertFunction(PutCharName, B.getInt32Ty(), B.getInt32Ty());
+ FunctionCallee PutChar =
+ M->getOrInsertFunction(PutCharName, B.getInt32Ty(), B.getInt32Ty());
inferLibFuncAttributes(M, PutCharName, *TLI);
CallInst *CI = B.CreateCall(PutChar,
B.CreateIntCast(Char,
@@ -1035,7 +1087,8 @@ Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B,
"chari"),
PutCharName);
- if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
+ if (const Function *F =
+ dyn_cast<Function>(PutChar.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
}
@@ -1047,11 +1100,12 @@ Value *llvm::emitPutS(Value *Str, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getModule();
StringRef PutsName = TLI->getName(LibFunc_puts);
- Value *PutS =
+ FunctionCallee PutS =
M->getOrInsertFunction(PutsName, B.getInt32Ty(), B.getInt8PtrTy());
inferLibFuncAttributes(M, PutsName, *TLI);
CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), PutsName);
- if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
+ if (const Function *F =
+ dyn_cast<Function>(PutS.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
}
@@ -1063,15 +1117,16 @@ Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getModule();
StringRef FPutcName = TLI->getName(LibFunc_fputc);
- Constant *F = M->getOrInsertFunction(FPutcName, B.getInt32Ty(), B.getInt32Ty(),
- File->getType());
+ FunctionCallee F = M->getOrInsertFunction(FPutcName, B.getInt32Ty(),
+ B.getInt32Ty(), File->getType());
if (File->getType()->isPointerTy())
inferLibFuncAttributes(M, FPutcName, *TLI);
Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
"chari");
CallInst *CI = B.CreateCall(F, {Char, File}, FPutcName);
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
@@ -1083,14 +1138,15 @@ Value *llvm::emitFPutCUnlocked(Value *Char, Value *File, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getModule();
StringRef FPutcUnlockedName = TLI->getName(LibFunc_fputc_unlocked);
- Constant *F = M->getOrInsertFunction(FPutcUnlockedName, B.getInt32Ty(),
- B.getInt32Ty(), File->getType());
+ FunctionCallee F = M->getOrInsertFunction(FPutcUnlockedName, B.getInt32Ty(),
+ B.getInt32Ty(), File->getType());
if (File->getType()->isPointerTy())
inferLibFuncAttributes(M, FPutcUnlockedName, *TLI);
Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/ true, "chari");
CallInst *CI = B.CreateCall(F, {Char, File}, FPutcUnlockedName);
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
@@ -1102,13 +1158,14 @@ Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getModule();
StringRef FPutsName = TLI->getName(LibFunc_fputs);
- Constant *F = M->getOrInsertFunction(
- FPutsName, B.getInt32Ty(), B.getInt8PtrTy(), File->getType());
+ FunctionCallee F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(),
+ B.getInt8PtrTy(), File->getType());
if (File->getType()->isPointerTy())
inferLibFuncAttributes(M, FPutsName, *TLI);
CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsName);
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
@@ -1120,13 +1177,14 @@ Value *llvm::emitFPutSUnlocked(Value *Str, Value *File, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getModule();
StringRef FPutsUnlockedName = TLI->getName(LibFunc_fputs_unlocked);
- Constant *F = M->getOrInsertFunction(FPutsUnlockedName, B.getInt32Ty(),
- B.getInt8PtrTy(), File->getType());
+ FunctionCallee F = M->getOrInsertFunction(FPutsUnlockedName, B.getInt32Ty(),
+ B.getInt8PtrTy(), File->getType());
if (File->getType()->isPointerTy())
inferLibFuncAttributes(M, FPutsUnlockedName, *TLI);
CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsUnlockedName);
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
@@ -1139,7 +1197,7 @@ Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
StringRef FWriteName = TLI->getName(LibFunc_fwrite);
- Constant *F = M->getOrInsertFunction(
+ FunctionCallee F = M->getOrInsertFunction(
FWriteName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
@@ -1149,7 +1207,8 @@ Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
B.CreateCall(F, {castToCStr(Ptr, B), Size,
ConstantInt::get(DL.getIntPtrType(Context), 1), File});
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
@@ -1162,12 +1221,13 @@ Value *llvm::emitMalloc(Value *Num, IRBuilder<> &B, const DataLayout &DL,
Module *M = B.GetInsertBlock()->getModule();
StringRef MallocName = TLI->getName(LibFunc_malloc);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *Malloc = M->getOrInsertFunction(MallocName, B.getInt8PtrTy(),
- DL.getIntPtrType(Context));
+ FunctionCallee Malloc = M->getOrInsertFunction(MallocName, B.getInt8PtrTy(),
+ DL.getIntPtrType(Context));
inferLibFuncAttributes(M, MallocName, *TLI);
CallInst *CI = B.CreateCall(Malloc, Num, MallocName);
- if (const Function *F = dyn_cast<Function>(Malloc->stripPointerCasts()))
+ if (const Function *F =
+ dyn_cast<Function>(Malloc.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
@@ -1182,12 +1242,13 @@ Value *llvm::emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
StringRef CallocName = TLI.getName(LibFunc_calloc);
const DataLayout &DL = M->getDataLayout();
IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext()));
- Value *Calloc = M->getOrInsertFunction(CallocName, Attrs, B.getInt8PtrTy(),
- PtrType, PtrType);
+ FunctionCallee Calloc = M->getOrInsertFunction(
+ CallocName, Attrs, B.getInt8PtrTy(), PtrType, PtrType);
inferLibFuncAttributes(M, CallocName, TLI);
CallInst *CI = B.CreateCall(Calloc, {Num, Size}, CallocName);
- if (const auto *F = dyn_cast<Function>(Calloc->stripPointerCasts()))
+ if (const auto *F =
+ dyn_cast<Function>(Calloc.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
@@ -1202,7 +1263,7 @@ Value *llvm::emitFWriteUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
StringRef FWriteUnlockedName = TLI->getName(LibFunc_fwrite_unlocked);
- Constant *F = M->getOrInsertFunction(
+ FunctionCallee F = M->getOrInsertFunction(
FWriteUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
@@ -1210,7 +1271,8 @@ Value *llvm::emitFWriteUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
inferLibFuncAttributes(M, FWriteUnlockedName, *TLI);
CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File});
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
@@ -1222,13 +1284,14 @@ Value *llvm::emitFGetCUnlocked(Value *File, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getModule();
StringRef FGetCUnlockedName = TLI->getName(LibFunc_fgetc_unlocked);
- Constant *F =
- M->getOrInsertFunction(FGetCUnlockedName, B.getInt32Ty(), File->getType());
+ FunctionCallee F = M->getOrInsertFunction(FGetCUnlockedName, B.getInt32Ty(),
+ File->getType());
if (File->getType()->isPointerTy())
inferLibFuncAttributes(M, FGetCUnlockedName, *TLI);
CallInst *CI = B.CreateCall(F, File, FGetCUnlockedName);
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
@@ -1240,14 +1303,15 @@ Value *llvm::emitFGetSUnlocked(Value *Str, Value *Size, Value *File,
Module *M = B.GetInsertBlock()->getModule();
StringRef FGetSUnlockedName = TLI->getName(LibFunc_fgets_unlocked);
- Constant *F =
+ FunctionCallee F =
M->getOrInsertFunction(FGetSUnlockedName, B.getInt8PtrTy(),
B.getInt8PtrTy(), B.getInt32Ty(), File->getType());
inferLibFuncAttributes(M, FGetSUnlockedName, *TLI);
CallInst *CI =
B.CreateCall(F, {castToCStr(Str, B), Size, File}, FGetSUnlockedName);
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
@@ -1261,7 +1325,7 @@ Value *llvm::emitFReadUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
StringRef FReadUnlockedName = TLI->getName(LibFunc_fread_unlocked);
- Constant *F = M->getOrInsertFunction(
+ FunctionCallee F = M->getOrInsertFunction(
FReadUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
@@ -1269,7 +1333,8 @@ Value *llvm::emitFReadUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
inferLibFuncAttributes(M, FReadUnlockedName, *TLI);
CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File});
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index e7828af648a9..df299f673f65 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -1,9 +1,8 @@
//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/CallPromotionUtils.cpp b/lib/Transforms/Utils/CallPromotionUtils.cpp
index e58ddcf34667..f04d76e70c0d 100644
--- a/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -1,9 +1,8 @@
//===- CallPromotionUtils.cpp - Utilities for call promotion ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -367,8 +366,9 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
CastInst **RetBitCast) {
assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted");
- // Set the called function of the call site to be the given callee.
- CS.setCalledFunction(Callee);
+ // Set the called function of the call site to be the given callee (but don't
+ // change the type).
+ cast<CallBase>(CS.getInstruction())->setCalledOperand(Callee);
// Since the call site will no longer be direct, we must clear metadata that
// is only appropriate for indirect calls. This includes !prof and !callees
@@ -412,6 +412,15 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
// Remove any incompatible attributes for the argument.
AttrBuilder ArgAttrs(CallerPAL.getParamAttributes(ArgNo));
ArgAttrs.remove(AttributeFuncs::typeIncompatible(FormalTy));
+
+ // If byval is used, this must be a pointer type, and the byval type must
+ // match the element type. Update it if present.
+ if (ArgAttrs.getByValType()) {
+ Type *NewTy = Callee->getParamByValType(ArgNo);
+ ArgAttrs.addByValAttr(
+ NewTy ? NewTy : cast<PointerType>(FormalTy)->getElementType());
+ }
+
NewArgAttrs.push_back(AttributeSet::get(Ctx, ArgAttrs));
AttributeChanged = true;
} else
diff --git a/lib/Transforms/Utils/CanonicalizeAliases.cpp b/lib/Transforms/Utils/CanonicalizeAliases.cpp
index cf41fd2e14c0..455fcbb1cf98 100644
--- a/lib/Transforms/Utils/CanonicalizeAliases.cpp
+++ b/lib/Transforms/Utils/CanonicalizeAliases.cpp
@@ -1,9 +1,8 @@
//===- CanonicalizeAliases.cpp - ThinLTO Support: Canonicalize Aliases ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 8f8c601f5f13..1026c9d37038 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -1,9 +1,8 @@
//===- CloneFunction.cpp - Clone a function into another function ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,13 +15,13 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
@@ -740,12 +739,12 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
const Twine &NameSuffix, LoopInfo *LI,
DominatorTree *DT,
SmallVectorImpl<BasicBlock *> &Blocks) {
- assert(OrigLoop->getSubLoops().empty() &&
- "Loop to be cloned cannot have inner loop");
Function *F = OrigLoop->getHeader()->getParent();
Loop *ParentLoop = OrigLoop->getParentLoop();
+ DenseMap<Loop *, Loop *> LMap;
Loop *NewLoop = LI->AllocateLoop();
+ LMap[OrigLoop] = NewLoop;
if (ParentLoop)
ParentLoop->addChildLoop(NewLoop);
else
@@ -765,14 +764,36 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
// Update DominatorTree.
DT->addNewBlock(NewPH, LoopDomBB);
+ for (Loop *CurLoop : OrigLoop->getLoopsInPreorder()) {
+ Loop *&NewLoop = LMap[CurLoop];
+ if (!NewLoop) {
+ NewLoop = LI->AllocateLoop();
+
+ // Establish the parent/child relationship.
+ Loop *OrigParent = CurLoop->getParentLoop();
+ assert(OrigParent && "Could not find the original parent loop");
+ Loop *NewParentLoop = LMap[OrigParent];
+ assert(NewParentLoop && "Could not find the new parent loop");
+
+ NewParentLoop->addChildLoop(NewLoop);
+ }
+ }
+
for (BasicBlock *BB : OrigLoop->getBlocks()) {
+ Loop *CurLoop = LI->getLoopFor(BB);
+ Loop *&NewLoop = LMap[CurLoop];
+ assert(NewLoop && "Expecting new loop to be allocated");
+
BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F);
VMap[BB] = NewBB;
// Update LoopInfo.
NewLoop->addBasicBlockToLoop(NewBB, *LI);
+ if (BB == CurLoop->getHeader())
+ NewLoop->moveToHeader(NewBB);
- // Add DominatorTree node. After seeing all blocks, update to correct IDom.
+ // Add DominatorTree node. After seeing all blocks, update to correct
+ // IDom.
DT->addNewBlock(NewBB, NewPH);
Blocks.push_back(NewBB);
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 659993aa5478..7ddf59becba9 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -1,9 +1,8 @@
//===- CloneModule.cpp - Clone an entire module ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 25d4ae583ecc..fa6d3f8ae873 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1,9 +1,8 @@
//===- CodeExtractor.cpp - Pull code region into a new function -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,6 +20,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
@@ -44,6 +44,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -67,6 +68,7 @@
#include <vector>
using namespace llvm;
+using namespace llvm::PatternMatch;
using ProfileCount = Function::ProfileCount;
#define DEBUG_TYPE "code-extractor"
@@ -207,6 +209,9 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
llvm_unreachable("Repeated basic blocks in extraction input");
}
+ LLVM_DEBUG(dbgs() << "Region front block: " << Result.front()->getName()
+ << '\n');
+
for (auto *BB : Result) {
if (!isBlockValidForExtraction(*BB, Result, AllowVarArgs, AllowAlloca))
return {};
@@ -224,9 +229,11 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
// the subgraph which is being extracted.
for (auto *PBB : predecessors(BB))
if (!Result.count(PBB)) {
- LLVM_DEBUG(
- dbgs() << "No blocks in this region may have entries from "
- "outside the region except for the first block!\n");
+ LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from "
+ "outside the region except for the first block!\n"
+ << "Problematic source BB: " << BB->getName() << "\n"
+ << "Problematic destination BB: " << PBB->getName()
+ << "\n");
return {};
}
}
@@ -236,18 +243,20 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
bool AggregateArgs, BlockFrequencyInfo *BFI,
- BranchProbabilityInfo *BPI, bool AllowVarArgs,
- bool AllowAlloca, std::string Suffix)
+ BranchProbabilityInfo *BPI, AssumptionCache *AC,
+ bool AllowVarArgs, bool AllowAlloca,
+ std::string Suffix)
: DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
- BPI(BPI), AllowVarArgs(AllowVarArgs),
+ BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs),
Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)),
Suffix(Suffix) {}
CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
BlockFrequencyInfo *BFI,
- BranchProbabilityInfo *BPI, std::string Suffix)
+ BranchProbabilityInfo *BPI, AssumptionCache *AC,
+ std::string Suffix)
: DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
- BPI(BPI), AllowVarArgs(false),
+ BPI(BPI), AC(AC), AllowVarArgs(false),
Blocks(buildExtractionBlockSet(L.getBlocks(), &DT,
/* AllowVarArgs */ false,
/* AllowAlloca */ false)),
@@ -325,7 +334,7 @@ bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
if (dyn_cast<Constant>(MemAddr))
break;
Value *Base = MemAddr->stripInBoundsConstantOffsets();
- if (!dyn_cast<AllocaInst>(Base) || Base == AI)
+ if (!isa<AllocaInst>(Base) || Base == AI)
return false;
break;
}
@@ -401,11 +410,74 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) {
return CommonExitBlock;
}
+// Find the pair of life time markers for address 'Addr' that are either
+// defined inside the outline region or can legally be shrinkwrapped into the
+// outline region. If there are not other untracked uses of the address, return
+// the pair of markers if found; otherwise return a pair of nullptr.
+CodeExtractor::LifetimeMarkerInfo
+CodeExtractor::getLifetimeMarkers(Instruction *Addr,
+ BasicBlock *ExitBlock) const {
+ LifetimeMarkerInfo Info;
+
+ for (User *U : Addr->users()) {
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
+ if (IntrInst) {
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) {
+ // Do not handle the case where Addr has multiple start markers.
+ if (Info.LifeStart)
+ return {};
+ Info.LifeStart = IntrInst;
+ }
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) {
+ if (Info.LifeEnd)
+ return {};
+ Info.LifeEnd = IntrInst;
+ }
+ continue;
+ }
+ // Find untracked uses of the address, bail.
+ if (!definedInRegion(Blocks, U))
+ return {};
+ }
+
+ if (!Info.LifeStart || !Info.LifeEnd)
+ return {};
+
+ Info.SinkLifeStart = !definedInRegion(Blocks, Info.LifeStart);
+ Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd);
+ // Do legality check.
+ if ((Info.SinkLifeStart || Info.HoistLifeEnd) &&
+ !isLegalToShrinkwrapLifetimeMarkers(Addr))
+ return {};
+
+ // Check to see if we have a place to do hoisting, if not, bail.
+ if (Info.HoistLifeEnd && !ExitBlock)
+ return {};
+
+ return Info;
+}
+
void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
BasicBlock *&ExitBlock) const {
Function *Func = (*Blocks.begin())->getParent();
ExitBlock = getCommonExitBlock(Blocks);
+ auto moveOrIgnoreLifetimeMarkers =
+ [&](const LifetimeMarkerInfo &LMI) -> bool {
+ if (!LMI.LifeStart)
+ return false;
+ if (LMI.SinkLifeStart) {
+ LLVM_DEBUG(dbgs() << "Sinking lifetime.start: " << *LMI.LifeStart
+ << "\n");
+ SinkCands.insert(LMI.LifeStart);
+ }
+ if (LMI.HoistLifeEnd) {
+ LLVM_DEBUG(dbgs() << "Hoisting lifetime.end: " << *LMI.LifeEnd << "\n");
+ HoistCands.insert(LMI.LifeEnd);
+ }
+ return true;
+ };
+
for (BasicBlock &BB : *Func) {
if (Blocks.count(&BB))
continue;
@@ -414,95 +486,52 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
if (!AI)
continue;
- // Find the pair of life time markers for address 'Addr' that are either
- // defined inside the outline region or can legally be shrinkwrapped into
- // the outline region. If there are not other untracked uses of the
- // address, return the pair of markers if found; otherwise return a pair
- // of nullptr.
- auto GetLifeTimeMarkers =
- [&](Instruction *Addr, bool &SinkLifeStart,
- bool &HoistLifeEnd) -> std::pair<Instruction *, Instruction *> {
- Instruction *LifeStart = nullptr, *LifeEnd = nullptr;
-
- for (User *U : Addr->users()) {
- IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
- if (IntrInst) {
- if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) {
- // Do not handle the case where AI has multiple start markers.
- if (LifeStart)
- return std::make_pair<Instruction *>(nullptr, nullptr);
- LifeStart = IntrInst;
- }
- if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) {
- if (LifeEnd)
- return std::make_pair<Instruction *>(nullptr, nullptr);
- LifeEnd = IntrInst;
- }
- continue;
- }
- // Find untracked uses of the address, bail.
- if (!definedInRegion(Blocks, U))
- return std::make_pair<Instruction *>(nullptr, nullptr);
- }
-
- if (!LifeStart || !LifeEnd)
- return std::make_pair<Instruction *>(nullptr, nullptr);
-
- SinkLifeStart = !definedInRegion(Blocks, LifeStart);
- HoistLifeEnd = !definedInRegion(Blocks, LifeEnd);
- // Do legality Check.
- if ((SinkLifeStart || HoistLifeEnd) &&
- !isLegalToShrinkwrapLifetimeMarkers(Addr))
- return std::make_pair<Instruction *>(nullptr, nullptr);
-
- // Check to see if we have a place to do hoisting, if not, bail.
- if (HoistLifeEnd && !ExitBlock)
- return std::make_pair<Instruction *>(nullptr, nullptr);
-
- return std::make_pair(LifeStart, LifeEnd);
- };
-
- bool SinkLifeStart = false, HoistLifeEnd = false;
- auto Markers = GetLifeTimeMarkers(AI, SinkLifeStart, HoistLifeEnd);
-
- if (Markers.first) {
- if (SinkLifeStart)
- SinkCands.insert(Markers.first);
+ LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(AI, ExitBlock);
+ bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo);
+ if (Moved) {
+ LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n");
SinkCands.insert(AI);
- if (HoistLifeEnd)
- HoistCands.insert(Markers.second);
continue;
}
- // Follow the bitcast.
- Instruction *MarkerAddr = nullptr;
+ // Follow any bitcasts.
+ SmallVector<Instruction *, 2> Bitcasts;
+ SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo;
for (User *U : AI->users()) {
if (U->stripInBoundsConstantOffsets() == AI) {
- SinkLifeStart = false;
- HoistLifeEnd = false;
Instruction *Bitcast = cast<Instruction>(U);
- Markers = GetLifeTimeMarkers(Bitcast, SinkLifeStart, HoistLifeEnd);
- if (Markers.first) {
- MarkerAddr = Bitcast;
+ LifetimeMarkerInfo LMI = getLifetimeMarkers(Bitcast, ExitBlock);
+ if (LMI.LifeStart) {
+ Bitcasts.push_back(Bitcast);
+ BitcastLifetimeInfo.push_back(LMI);
continue;
}
}
// Found unknown use of AI.
if (!definedInRegion(Blocks, U)) {
- MarkerAddr = nullptr;
+ Bitcasts.clear();
break;
}
}
- if (MarkerAddr) {
- if (SinkLifeStart)
- SinkCands.insert(Markers.first);
- if (!definedInRegion(Blocks, MarkerAddr))
- SinkCands.insert(MarkerAddr);
- SinkCands.insert(AI);
- if (HoistLifeEnd)
- HoistCands.insert(Markers.second);
+ // Either no bitcasts reference the alloca or there are unknown uses.
+ if (Bitcasts.empty())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n");
+ SinkCands.insert(AI);
+ for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) {
+ Instruction *BitcastAddr = Bitcasts[I];
+ const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I];
+ assert(LMI.LifeStart &&
+ "Unsafe to sink bitcast without lifetime markers");
+ moveOrIgnoreLifetimeMarkers(LMI);
+ if (!definedInRegion(Blocks, BitcastAddr)) {
+ LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr
+ << "\n");
+ SinkCands.insert(BitcastAddr);
+ }
}
}
}
@@ -780,6 +809,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::NoBuiltin:
case Attribute::NoCapture:
case Attribute::NoReturn:
+ case Attribute::NoSync:
case Attribute::None:
case Attribute::NonNull:
case Attribute::ReadNone:
@@ -792,8 +822,10 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::StructRet:
case Attribute::SwiftError:
case Attribute::SwiftSelf:
+ case Attribute::WillReturn:
case Attribute::WriteOnly:
case Attribute::ZExt:
+ case Attribute::ImmArg:
case Attribute::EndAttrKinds:
continue;
// Those attributes should be safe to propagate to the extracted function.
@@ -803,6 +835,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::InlineHint:
case Attribute::MinSize:
case Attribute::NoDuplicate:
+ case Attribute::NoFree:
case Attribute::NoImplicitFloat:
case Attribute::NoInline:
case Attribute::NonLazyBind:
@@ -817,6 +850,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::SanitizeMemory:
case Attribute::SanitizeThread:
case Attribute::SanitizeHWAddress:
+ case Attribute::SanitizeMemTag:
case Attribute::SpeculativeLoadHardening:
case Attribute::StackProtect:
case Attribute::StackProtectReq:
@@ -845,7 +879,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
Instruction *TI = newFunction->begin()->getTerminator();
GetElementPtrInst *GEP = GetElementPtrInst::Create(
StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI);
- RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
+ RewriteVal = new LoadInst(StructTy->getElementType(i), GEP,
+ "loadgep_" + inputs[i]->getName(), TI);
} else
RewriteVal = &*AI++;
@@ -880,6 +915,88 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
return newFunction;
}
+/// Erase lifetime.start markers which reference inputs to the extraction
+/// region, and insert the referenced memory into \p LifetimesStart.
+///
+/// The extraction region is defined by a set of blocks (\p Blocks), and a set
+/// of allocas which will be moved from the caller function into the extracted
+/// function (\p SunkAllocas).
+static void eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
+ const SetVector<Value *> &SunkAllocas,
+ SetVector<Value *> &LifetimesStart) {
+ for (BasicBlock *BB : Blocks) {
+ for (auto It = BB->begin(), End = BB->end(); It != End;) {
+ auto *II = dyn_cast<IntrinsicInst>(&*It);
+ ++It;
+ if (!II || !II->isLifetimeStartOrEnd())
+ continue;
+
+ // Get the memory operand of the lifetime marker. If the underlying
+ // object is a sunk alloca, or is otherwise defined in the extraction
+ // region, the lifetime marker must not be erased.
+ Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
+ if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
+ continue;
+
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ LifetimesStart.insert(Mem);
+ II->eraseFromParent();
+ }
+ }
+}
+
+/// Insert lifetime start/end markers surrounding the call to the new function
+/// for objects defined in the caller.
+static void insertLifetimeMarkersSurroundingCall(
+ Module *M, ArrayRef<Value *> LifetimesStart, ArrayRef<Value *> LifetimesEnd,
+ CallInst *TheCall) {
+ LLVMContext &Ctx = M->getContext();
+ auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
+ auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
+ Instruction *Term = TheCall->getParent()->getTerminator();
+
+ // The memory argument to a lifetime marker must be a i8*. Cache any bitcasts
+ // needed to satisfy this requirement so they may be reused.
+ DenseMap<Value *, Value *> Bitcasts;
+
+ // Emit lifetime markers for the pointers given in \p Objects. Insert the
+ // markers before the call if \p InsertBefore, and after the call otherwise.
+ auto insertMarkers = [&](Function *MarkerFunc, ArrayRef<Value *> Objects,
+ bool InsertBefore) {
+ for (Value *Mem : Objects) {
+ assert((!isa<Instruction>(Mem) || cast<Instruction>(Mem)->getFunction() ==
+ TheCall->getFunction()) &&
+ "Input memory not defined in original function");
+ Value *&MemAsI8Ptr = Bitcasts[Mem];
+ if (!MemAsI8Ptr) {
+ if (Mem->getType() == Int8PtrTy)
+ MemAsI8Ptr = Mem;
+ else
+ MemAsI8Ptr =
+ CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
+ }
+
+ auto Marker = CallInst::Create(MarkerFunc, {NegativeOne, MemAsI8Ptr});
+ if (InsertBefore)
+ Marker->insertBefore(TheCall);
+ else
+ Marker->insertBefore(Term);
+ }
+ };
+
+ if (!LifetimesStart.empty()) {
+ auto StartFn = llvm::Intrinsic::getDeclaration(
+ M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
+ insertMarkers(StartFn, LifetimesStart, /*InsertBefore=*/true);
+ }
+
+ if (!LifetimesEnd.empty()) {
+ auto EndFn = llvm::Intrinsic::getDeclaration(
+ M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
+ insertMarkers(EndFn, LifetimesEnd, /*InsertBefore=*/false);
+ }
+}
+
/// emitCallAndSwitchStatement - This method sets up the caller side by adding
/// the call instruction, splitting any PHI nodes in the header block as
/// necessary.
@@ -897,11 +1014,18 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
CallInst *call = nullptr;
// Add inputs as params, or to be filled into the struct
- for (Value *input : inputs)
+ unsigned ArgNo = 0;
+ SmallVector<unsigned, 1> SwiftErrorArgs;
+ for (Value *input : inputs) {
if (AggregateArgs)
StructValues.push_back(input);
- else
+ else {
params.push_back(input);
+ if (input->isSwiftError())
+ SwiftErrorArgs.push_back(ArgNo);
+ }
+ ++ArgNo;
+ }
// Create allocas for the outputs
for (Value *output : outputs) {
@@ -957,13 +1081,18 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
}
codeReplacer->getInstList().push_back(call);
+ // Set swifterror parameter attributes.
+ for (unsigned SwiftErrArgNo : SwiftErrorArgs) {
+ call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError);
+ newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError);
+ }
+
Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
unsigned FirstOut = inputs.size();
if (!AggregateArgs)
std::advance(OutputArgBegin, inputs.size());
// Reload the outputs passed in by reference.
- Function::arg_iterator OAI = OutputArgBegin;
for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
Value *Output = nullptr;
if (AggregateArgs) {
@@ -977,7 +1106,8 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
} else {
Output = ReloadOutputs[i];
}
- LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
+ LoadInst *load = new LoadInst(outputs[i]->getType(), Output,
+ outputs[i]->getName() + ".reload");
Reloads.push_back(load);
codeReplacer->getInstList().push_back(load);
std::vector<User *> Users(outputs[i]->user_begin(), outputs[i]->user_end());
@@ -986,40 +1116,6 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
if (!Blocks.count(inst->getParent()))
inst->replaceUsesOfWith(outputs[i], load);
}
-
- // Store to argument right after the definition of output value.
- auto *OutI = dyn_cast<Instruction>(outputs[i]);
- if (!OutI)
- continue;
-
- // Find proper insertion point.
- BasicBlock::iterator InsertPt;
- // In case OutI is an invoke, we insert the store at the beginning in the
- // 'normal destination' BB. Otherwise we insert the store right after OutI.
- if (auto *InvokeI = dyn_cast<InvokeInst>(OutI))
- InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt();
- else if (auto *Phi = dyn_cast<PHINode>(OutI))
- InsertPt = Phi->getParent()->getFirstInsertionPt();
- else
- InsertPt = std::next(OutI->getIterator());
-
- assert(OAI != newFunction->arg_end() &&
- "Number of output arguments should match "
- "the amount of defined values");
- if (AggregateArgs) {
- Value *Idx[2];
- Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
- Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
- GetElementPtrInst *GEP = GetElementPtrInst::Create(
- StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), &*InsertPt);
- new StoreInst(outputs[i], GEP, &*InsertPt);
- // Since there should be only one struct argument aggregating
- // all the output values, we shouldn't increment OAI, which always
- // points to the struct argument, in this case.
- } else {
- new StoreInst(outputs[i], &*OAI, &*InsertPt);
- ++OAI;
- }
}
// Now we can emit a switch statement using the call as a value.
@@ -1075,6 +1171,50 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
}
}
+ // Store the arguments right after the definition of output value.
+ // This should be proceeded after creating exit stubs to be ensure that invoke
+ // result restore will be placed in the outlined function.
+ Function::arg_iterator OAI = OutputArgBegin;
+ for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
+ auto *OutI = dyn_cast<Instruction>(outputs[i]);
+ if (!OutI)
+ continue;
+
+ // Find proper insertion point.
+ BasicBlock::iterator InsertPt;
+ // In case OutI is an invoke, we insert the store at the beginning in the
+ // 'normal destination' BB. Otherwise we insert the store right after OutI.
+ if (auto *InvokeI = dyn_cast<InvokeInst>(OutI))
+ InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt();
+ else if (auto *Phi = dyn_cast<PHINode>(OutI))
+ InsertPt = Phi->getParent()->getFirstInsertionPt();
+ else
+ InsertPt = std::next(OutI->getIterator());
+
+ Instruction *InsertBefore = &*InsertPt;
+ assert((InsertBefore->getFunction() == newFunction ||
+ Blocks.count(InsertBefore->getParent())) &&
+ "InsertPt should be in new function");
+ assert(OAI != newFunction->arg_end() &&
+ "Number of output arguments should match "
+ "the amount of defined values");
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(),
+ InsertBefore);
+ new StoreInst(outputs[i], GEP, InsertBefore);
+ // Since there should be only one struct argument aggregating
+ // all the output values, we shouldn't increment OAI, which always
+ // points to the struct argument, in this case.
+ } else {
+ new StoreInst(outputs[i], &*OAI, InsertBefore);
+ ++OAI;
+ }
+ }
+
// Now that we've done the deed, simplify the switch instruction.
Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
switch (NumExitBlocks) {
@@ -1119,6 +1259,10 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
break;
}
+ // Insert lifetime markers around the reloads of any output values. The
+ // allocas output values are stored in are only in-use in the codeRepl block.
+ insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call);
+
return call;
}
@@ -1133,6 +1277,13 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) {
// Insert this basic block into the new function
newBlocks.push_back(Block);
+
+ // Remove @llvm.assume calls that were moved to the new function from the
+ // old function's assumption cache.
+ if (AC)
+ for (auto &I : *Block)
+ if (match(&I, m_Intrinsic<Intrinsic::assume>()))
+ AC->unregisterAssumption(cast<CallInst>(&I));
}
}
@@ -1181,71 +1332,6 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
}
-/// Scan the extraction region for lifetime markers which reference inputs.
-/// Erase these markers. Return the inputs which were referenced.
-///
-/// The extraction region is defined by a set of blocks (\p Blocks), and a set
-/// of allocas which will be moved from the caller function into the extracted
-/// function (\p SunkAllocas).
-static SetVector<Value *>
-eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
- const SetVector<Value *> &SunkAllocas) {
- SetVector<Value *> InputObjectsWithLifetime;
- for (BasicBlock *BB : Blocks) {
- for (auto It = BB->begin(), End = BB->end(); It != End;) {
- auto *II = dyn_cast<IntrinsicInst>(&*It);
- ++It;
- if (!II || !II->isLifetimeStartOrEnd())
- continue;
-
- // Get the memory operand of the lifetime marker. If the underlying
- // object is a sunk alloca, or is otherwise defined in the extraction
- // region, the lifetime marker must not be erased.
- Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
- if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
- continue;
-
- InputObjectsWithLifetime.insert(Mem);
- II->eraseFromParent();
- }
- }
- return InputObjectsWithLifetime;
-}
-
-/// Insert lifetime start/end markers surrounding the call to the new function
-/// for objects defined in the caller.
-static void insertLifetimeMarkersSurroundingCall(
- Module *M, const SetVector<Value *> &InputObjectsWithLifetime,
- CallInst *TheCall) {
- if (InputObjectsWithLifetime.empty())
- return;
-
- LLVMContext &Ctx = M->getContext();
- auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
- auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
- auto LifetimeStartFn = llvm::Intrinsic::getDeclaration(
- M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
- auto LifetimeEndFn = llvm::Intrinsic::getDeclaration(
- M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
- for (Value *Mem : InputObjectsWithLifetime) {
- assert((!isa<Instruction>(Mem) ||
- cast<Instruction>(Mem)->getFunction() == TheCall->getFunction()) &&
- "Input memory not defined in original function");
- Value *MemAsI8Ptr = nullptr;
- if (Mem->getType() == Int8PtrTy)
- MemAsI8Ptr = Mem;
- else
- MemAsI8Ptr =
- CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
-
- auto StartMarker =
- CallInst::Create(LifetimeStartFn, {NegativeOne, MemAsI8Ptr});
- StartMarker->insertBefore(TheCall);
- auto EndMarker = CallInst::Create(LifetimeEndFn, {NegativeOne, MemAsI8Ptr});
- EndMarker->insertAfter(TheCall);
- }
-}
-
Function *CodeExtractor::extractCodeRegion() {
if (!isEligible())
return nullptr;
@@ -1348,10 +1434,24 @@ Function *CodeExtractor::extractCodeRegion() {
// Find inputs to, outputs from the code region.
findInputsOutputs(inputs, outputs, SinkingCands);
- // Now sink all instructions which only have non-phi uses inside the region
- for (auto *II : SinkingCands)
- cast<Instruction>(II)->moveBefore(*newFuncRoot,
- newFuncRoot->getFirstInsertionPt());
+ // Now sink all instructions which only have non-phi uses inside the region.
+ // Group the allocas at the start of the block, so that any bitcast uses of
+ // the allocas are well-defined.
+ AllocaInst *FirstSunkAlloca = nullptr;
+ for (auto *II : SinkingCands) {
+ if (auto *AI = dyn_cast<AllocaInst>(II)) {
+ AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt());
+ if (!FirstSunkAlloca)
+ FirstSunkAlloca = AI;
+ }
+ }
+ assert((SinkingCands.empty() || FirstSunkAlloca) &&
+ "Did not expect a sink candidate without any allocas");
+ for (auto *II : SinkingCands) {
+ if (!isa<AllocaInst>(II)) {
+ cast<Instruction>(II)->moveAfter(FirstSunkAlloca);
+ }
+ }
if (!HoistingCands.empty()) {
auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit);
@@ -1361,11 +1461,11 @@ Function *CodeExtractor::extractCodeRegion() {
}
// Collect objects which are inputs to the extraction region and also
- // referenced by lifetime start/end markers within it. The effects of these
+ // referenced by lifetime start markers within it. The effects of these
// markers must be replicated in the calling function to prevent the stack
// coloring pass from merging slots which store input objects.
- ValueSet InputObjectsWithLifetime =
- eraseLifetimeMarkersOnInputs(Blocks, SinkingCands);
+ ValueSet LifetimesStart;
+ eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart);
// Construct new function based on inputs/outputs & add allocas for all defs.
Function *newFunction =
@@ -1388,8 +1488,8 @@ Function *CodeExtractor::extractCodeRegion() {
// Replicate the effects of any lifetime start/end markers which referenced
// input objects in the extraction region by placing markers around the call.
- insertLifetimeMarkersSurroundingCall(oldFunction->getParent(),
- InputObjectsWithLifetime, TheCall);
+ insertLifetimeMarkersSurroundingCall(
+ oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, TheCall);
// Propagate personality info to the new function if there is one.
if (oldFunction->hasPersonalityFn())
diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp
index 4e7da7d0449f..069a86f6ab33 100644
--- a/lib/Transforms/Utils/CtorUtils.cpp
+++ b/lib/Transforms/Utils/CtorUtils.cpp
@@ -1,9 +1,8 @@
//===- CtorUtils.cpp - Helpers for working with global_ctors ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index 975b363859a9..5f53d794fe8a 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -1,9 +1,8 @@
//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -73,7 +72,8 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
Value *&V = Loads[PN->getIncomingBlock(i)];
if (!V) {
// Insert the load into the predecessor block
- V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads,
+ V = new LoadInst(I.getType(), Slot, I.getName() + ".reload",
+ VolatileLoads,
PN->getIncomingBlock(i)->getTerminator());
}
PN->setIncomingValue(i, V);
@@ -81,7 +81,8 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
} else {
// If this is a normal instruction, just insert a load.
- Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U);
+ Value *V = new LoadInst(I.getType(), Slot, I.getName() + ".reload",
+ VolatileLoads, U);
U->replaceUsesOfWith(&I, V);
}
}
@@ -142,7 +143,8 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
/* empty */; // Don't insert before PHI nodes or landingpad instrs.
- Value *V = new LoadInst(Slot, P->getName() + ".reload", &*InsertPt);
+ Value *V =
+ new LoadInst(P->getType(), Slot, P->getName() + ".reload", &*InsertPt);
P->replaceAllUsesWith(V);
// Delete PHI.
diff --git a/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index 569ea58a3047..4aa40eeadda4 100644
--- a/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -1,9 +1,8 @@
//===- EntryExitInstrumenter.cpp - Function Entry/Exit Instrumentation ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -31,7 +30,7 @@ static void insertCall(Function &CurFn, StringRef Func,
Func == "__mcount" ||
Func == "_mcount" ||
Func == "__cyg_profile_func_enter_bare") {
- Constant *Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C));
+ FunctionCallee Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C));
CallInst *Call = CallInst::Create(Fn, "", InsertionPt);
Call->setDebugLoc(DL);
return;
@@ -40,7 +39,7 @@ static void insertCall(Function &CurFn, StringRef Func,
if (Func == "__cyg_profile_func_enter" || Func == "__cyg_profile_func_exit") {
Type *ArgTypes[] = {Type::getInt8PtrTy(C), Type::getInt8PtrTy(C)};
- Constant *Fn = M.getOrInsertFunction(
+ FunctionCallee Fn = M.getOrInsertFunction(
Func, FunctionType::get(Type::getVoidTy(C), ArgTypes, false));
Instruction *RetAddr = CallInst::Create(
diff --git a/lib/Transforms/Utils/EscapeEnumerator.cpp b/lib/Transforms/Utils/EscapeEnumerator.cpp
index 762a374c135c..914babeb6829 100644
--- a/lib/Transforms/Utils/EscapeEnumerator.cpp
+++ b/lib/Transforms/Utils/EscapeEnumerator.cpp
@@ -1,9 +1,8 @@
//===- EscapeEnumerator.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,7 +18,7 @@
#include "llvm/IR/Module.h"
using namespace llvm;
-static Constant *getDefaultPersonalityFn(Module *M) {
+static FunctionCallee getDefaultPersonalityFn(Module *M) {
LLVMContext &C = M->getContext();
Triple T(M->getTargetTriple());
EHPersonality Pers = getDefaultEHPersonality(T);
@@ -69,8 +68,8 @@ IRBuilder<> *EscapeEnumerator::Next() {
BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C));
if (!F.hasPersonalityFn()) {
- Constant *PersFn = getDefaultPersonalityFn(F.getParent());
- F.setPersonalityFn(PersFn);
+ FunctionCallee PersFn = getDefaultPersonalityFn(F.getParent());
+ F.setPersonalityFn(cast<Constant>(PersFn.getCallee()));
}
if (isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) {
diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp
index e875cd686b00..0e203f4e075d 100644
--- a/lib/Transforms/Utils/Evaluator.cpp
+++ b/lib/Transforms/Utils/Evaluator.cpp
@@ -1,9 +1,8 @@
//===- Evaluator.cpp - LLVM IR evaluator ----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -175,6 +174,34 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
return false;
}
+/// Apply 'Func' to Ptr. If this returns nullptr, introspect the pointer's
+/// type and walk down through the initial elements to obtain additional
+/// pointers to try. Returns the first non-null return value from Func, or
+/// nullptr if the type can't be introspected further.
+static Constant *
+evaluateBitcastFromPtr(Constant *Ptr, const DataLayout &DL,
+ const TargetLibraryInfo *TLI,
+ std::function<Constant *(Constant *)> Func) {
+ Constant *Val;
+ while (!(Val = Func(Ptr))) {
+ // If Ty is a struct, we can convert the pointer to the struct
+ // into a pointer to its first member.
+ // FIXME: This could be extended to support arrays as well.
+ Type *Ty = cast<PointerType>(Ptr->getType())->getElementType();
+ if (!isa<StructType>(Ty))
+ break;
+
+ IntegerType *IdxTy = IntegerType::get(Ty->getContext(), 32);
+ Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
+ Constant *const IdxList[] = {IdxZero, IdxZero};
+
+ Ptr = ConstantExpr::getGetElementPtr(Ty, Ptr, IdxList);
+ if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI))
+ Ptr = FoldedPtr;
+ }
+ return Val;
+}
+
static Constant *getInitializer(Constant *C) {
auto *GV = dyn_cast<GlobalVariable>(C);
return GV && GV->hasDefinitiveInitializer() ? GV->getInitializer() : nullptr;
@@ -185,8 +212,14 @@ static Constant *getInitializer(Constant *C) {
Constant *Evaluator::ComputeLoadResult(Constant *P) {
// If this memory location has been recently stored, use the stored value: it
// is the most up-to-date.
- DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P);
- if (I != MutatedMemory.end()) return I->second;
+ auto findMemLoc = [this](Constant *Ptr) {
+ DenseMap<Constant *, Constant *>::const_iterator I =
+ MutatedMemory.find(Ptr);
+ return I != MutatedMemory.end() ? I->second : nullptr;
+ };
+
+ if (Constant *Val = findMemLoc(P))
+ return Val;
// Access it.
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
@@ -204,13 +237,17 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) {
break;
// Handle a constantexpr bitcast.
case Instruction::BitCast:
- Constant *Val = getVal(CE->getOperand(0));
- auto MM = MutatedMemory.find(Val);
- auto *I = (MM != MutatedMemory.end()) ? MM->second
- : getInitializer(CE->getOperand(0));
- if (I)
+ // We're evaluating a load through a pointer that was bitcast to a
+ // different type. See if the "from" pointer has recently been stored.
+ // If it hasn't, we may still be able to find a stored pointer by
+ // introspecting the type.
+ Constant *Val =
+ evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, findMemLoc);
+ if (!Val)
+ Val = getInitializer(CE->getOperand(0));
+ if (Val)
return ConstantFoldLoadThroughBitcast(
- I, P->getType()->getPointerElementType(), DL);
+ Val, P->getType()->getPointerElementType(), DL);
break;
}
}
@@ -330,37 +367,26 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
<< "Attempting to resolve bitcast on constant ptr.\n");
// If we're evaluating a store through a bitcast, then we need
// to pull the bitcast off the pointer type and push it onto the
- // stored value.
- Ptr = CE->getOperand(0);
-
- Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
-
- // In order to push the bitcast onto the stored value, a bitcast
- // from NewTy to Val's type must be legal. If it's not, we can try
- // introspecting NewTy to find a legal conversion.
- Constant *NewVal;
- while (!(NewVal = ConstantFoldLoadThroughBitcast(Val, NewTy, DL))) {
- // If NewTy is a struct, we can convert the pointer to the struct
- // into a pointer to its first member.
- // FIXME: This could be extended to support arrays as well.
- if (StructType *STy = dyn_cast<StructType>(NewTy)) {
- NewTy = STy->getTypeAtIndex(0U);
-
- IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
- Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
- Constant * const IdxList[] = {IdxZero, IdxZero};
-
- Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList);
- if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI))
- Ptr = FoldedPtr;
-
- // If we can't improve the situation by introspecting NewTy,
- // we have to give up.
- } else {
- LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
- "evaluate.\n");
- return false;
+ // stored value. In order to push the bitcast onto the stored value,
+ // a bitcast from the pointer's element type to Val's type must be
+ // legal. If it's not, we can try introspecting the type to find a
+ // legal conversion.
+
+ auto castValTy = [&](Constant *P) -> Constant * {
+ Type *Ty = cast<PointerType>(P->getType())->getElementType();
+ if (Constant *FV = ConstantFoldLoadThroughBitcast(Val, Ty, DL)) {
+ Ptr = P;
+ return FV;
}
+ return nullptr;
+ };
+
+ Constant *NewVal =
+ evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, castValTy);
+ if (!NewVal) {
+ LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
+ "evaluate.\n");
+ return false;
}
Val = NewVal;
@@ -541,7 +567,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
if (Callee->isDeclaration()) {
// If this is a function we can constant fold, do it.
- if (Constant *C = ConstantFoldCall(CS, Callee, Formals, TLI)) {
+ if (Constant *C = ConstantFoldCall(cast<CallBase>(CS.getInstruction()),
+ Callee, Formals, TLI)) {
InstResult = castCallResultIfNeeded(CS.getCalledValue(), C);
if (!InstResult)
return false;
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index d9778f4a1fb7..0c52e6f3703b 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -1,9 +1,8 @@
//===- FlatternCFG.cpp - Code to perform CFG flattening -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp
index a717d9b72819..a9b28754c8e9 100644
--- a/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/lib/Transforms/Utils/FunctionComparator.cpp
@@ -1,9 +1,8 @@
//===- FunctionComparator.h - Function Comparator -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -114,6 +113,19 @@ int FunctionComparator::cmpAttrs(const AttributeList L,
for (; LI != LE && RI != RE; ++LI, ++RI) {
Attribute LA = *LI;
Attribute RA = *RI;
+ if (LA.isTypeAttribute() && RA.isTypeAttribute()) {
+ if (LA.getKindAsEnum() != RA.getKindAsEnum())
+ return cmpNumbers(LA.getKindAsEnum(), RA.getKindAsEnum());
+
+ Type *TyL = LA.getValueAsType();
+ Type *TyR = RA.getValueAsType();
+ if (TyL && TyR)
+ return cmpTypes(TyL, TyR);
+
+ // Two pointers, at least one null, so the comparison result is
+ // independent of the value of a real pointer.
+ return cmpNumbers((uint64_t)TyL, (uint64_t)TyR);
+ }
if (LA < RA)
return -1;
if (RA < LA)
@@ -557,31 +569,20 @@ int FunctionComparator::cmpOperations(const Instruction *L,
}
if (const CmpInst *CI = dyn_cast<CmpInst>(L))
return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
- if (const CallInst *CI = dyn_cast<CallInst>(L)) {
- if (int Res = cmpNumbers(CI->getCallingConv(),
- cast<CallInst>(R)->getCallingConv()))
+ if (auto CSL = CallSite(const_cast<Instruction *>(L))) {
+ auto CSR = CallSite(const_cast<Instruction *>(R));
+ if (int Res = cmpNumbers(CSL.getCallingConv(), CSR.getCallingConv()))
return Res;
- if (int Res =
- cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
+ if (int Res = cmpAttrs(CSL.getAttributes(), CSR.getAttributes()))
return Res;
- if (int Res = cmpOperandBundlesSchema(CI, R))
- return Res;
- return cmpRangeMetadata(
- CI->getMetadata(LLVMContext::MD_range),
- cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
- }
- if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) {
- if (int Res = cmpNumbers(II->getCallingConv(),
- cast<InvokeInst>(R)->getCallingConv()))
+ if (int Res = cmpOperandBundlesSchema(L, R))
return Res;
- if (int Res =
- cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
- return Res;
- if (int Res = cmpOperandBundlesSchema(II, R))
- return Res;
- return cmpRangeMetadata(
- II->getMetadata(LLVMContext::MD_range),
- cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
+ if (const CallInst *CI = dyn_cast<CallInst>(L))
+ if (int Res = cmpNumbers(CI->getTailCallKind(),
+ cast<CallInst>(R)->getTailCallKind()))
+ return Res;
+ return cmpRangeMetadata(L->getMetadata(LLVMContext::MD_range),
+ R->getMetadata(LLVMContext::MD_range));
}
if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
ArrayRef<unsigned> LIndices = IVI->getIndices();
diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp
index a9772e31da50..c9cc0990f237 100644
--- a/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -1,9 +1,8 @@
//===- lib/Transforms/Utils/FunctionImportUtils.cpp - Importing utilities -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -130,7 +129,7 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
// definitions upon import, so that they are available for inlining
// and/or optimization, but are turned into declarations later
// during the EliminateAvailableExternally pass.
- if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+ if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV))
return GlobalValue::AvailableExternallyLinkage;
// An imported external declaration stays external.
return SGV->getLinkage();
@@ -159,7 +158,7 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
// equivalent, so the issue described above for weak_any does not exist,
// and the definition can be imported. It can be treated similarly
// to an imported externally visible global value.
- if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+ if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV))
return GlobalValue::AvailableExternallyLinkage;
else
return GlobalValue::ExternalLinkage;
@@ -177,7 +176,7 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
// If we are promoting the local to global scope, it is handled
// similarly to a normal externally visible global.
if (DoPromote) {
- if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+ if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV))
return GlobalValue::AvailableExternallyLinkage;
else
return GlobalValue::ExternalLinkage;
@@ -230,11 +229,11 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
}
}
- // Mark read-only variables which can be imported with specific attribute.
- // We can't internalize them now because IRMover will fail to link variable
- // definitions to their external declarations during ThinLTO import. We'll
- // internalize read-only variables later, after import is finished.
- // See internalizeImmutableGVs.
+ // Mark read/write-only variables which can be imported with specific
+ // attribute. We can't internalize them now because IRMover will fail
+ // to link variable definitions to their external declarations during
+ // ThinLTO import. We'll internalize read-only variables later, after
+ // import is finished. See internalizeGVsAfterImport.
//
// If global value dead stripping is not enabled in summary then
// propagateConstants hasn't been run. We can't internalize GV
@@ -242,13 +241,16 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
if (!GV.isDeclaration() && VI && ImportIndex.withGlobalValueDeadStripping()) {
const auto &SL = VI.getSummaryList();
auto *GVS = SL.empty() ? nullptr : dyn_cast<GlobalVarSummary>(SL[0].get());
- if (GVS && GVS->isReadOnly())
+ // At this stage "maybe" is "definitely"
+ if (GVS && (GVS->maybeReadOnly() || GVS->maybeWriteOnly()))
cast<GlobalVariable>(&GV)->addAttribute("thinlto-internalize");
}
bool DoPromote = false;
if (GV.hasLocalLinkage() &&
((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) {
+ // Save the original name string before we rename GV below.
+ auto Name = GV.getName().str();
// Once we change the name or linkage it is difficult to determine
// again whether we should promote since shouldPromoteLocalToGlobal needs
// to locate the summary (based on GUID from name and linkage). Therefore,
@@ -257,6 +259,12 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
GV.setLinkage(getLinkage(&GV, DoPromote));
if (!GV.hasLocalLinkage())
GV.setVisibility(GlobalValue::HiddenVisibility);
+
+ // If we are renaming a COMDAT leader, ensure that we record the COMDAT
+ // for later renaming as well. This is required for COFF.
+ if (const auto *C = GV.getComdat())
+ if (C->getName() == Name)
+ RenamedComdats.try_emplace(C, M.getOrInsertComdat(GV.getName()));
} else
GV.setLinkage(getLinkage(&GV, /* DoPromote */ false));
@@ -281,6 +289,16 @@ void FunctionImportGlobalProcessing::processGlobalsForThinLTO() {
processGlobalForThinLTO(SF);
for (GlobalAlias &GA : M.aliases())
processGlobalForThinLTO(GA);
+
+ // Replace any COMDATS that required renaming (because the COMDAT leader was
+ // promoted and renamed).
+ if (!RenamedComdats.empty())
+ for (auto &GO : M.global_objects())
+ if (auto *C = GO.getComdat()) {
+ auto Replacement = RenamedComdats.find(C);
+ if (Replacement != RenamedComdats.end())
+ GO.setComdat(Replacement->second);
+ }
}
bool FunctionImportGlobalProcessing::run() {
diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
index ff6970db47da..a2942869130d 100644
--- a/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -1,9 +1,8 @@
//===-- GlobalStatus.cpp - Compute status info for globals -----------------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Utils/GuardUtils.cpp b/lib/Transforms/Utils/GuardUtils.cpp
index 08de0a4c53e9..34c32d9c0c98 100644
--- a/lib/Transforms/Utils/GuardUtils.cpp
+++ b/lib/Transforms/Utils/GuardUtils.cpp
@@ -1,9 +1,8 @@
//===-- GuardUtils.cpp - Utils for work with guards -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Utils that are used to perform transformations related to guards and their
diff --git a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
index 02482c550321..8041e66e6c4c 100644
--- a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
+++ b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
@@ -1,9 +1,8 @@
//===-- ImportedFunctionsInliningStats.cpp ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Generating inliner statistics for imported functions, mostly useful for
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 623fe91a5a60..a7f0f7ac5d61 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -1,9 +1,8 @@
//===- InlineFunction.cpp - Code to perform function inlining -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -85,16 +84,10 @@ PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
cl::init(true), cl::Hidden,
cl::desc("Convert align attributes to assumptions during inlining."));
-llvm::InlineResult llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
- AAResults *CalleeAAR,
- bool InsertLifetime) {
- return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime);
-}
-
-llvm::InlineResult llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
+llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI,
AAResults *CalleeAAR,
bool InsertLifetime) {
- return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime);
+ return InlineFunction(CallSite(CB), IFI, CalleeAAR, InsertLifetime);
}
namespace {
@@ -1042,11 +1035,10 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
SmallSetVector<const Argument *, 4> NAPtrArgs;
for (const Value *V : PtrArgs) {
- SmallVector<Value *, 4> Objects;
- GetUnderlyingObjects(const_cast<Value*>(V),
- Objects, DL, /* LI = */ nullptr);
+ SmallVector<const Value *, 4> Objects;
+ GetUnderlyingObjects(V, Objects, DL, /* LI = */ nullptr);
- for (Value *O : Objects)
+ for (const Value *O : Objects)
ObjSet.insert(O);
}
@@ -1216,14 +1208,14 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
// If the call was inlined, but then constant folded, there is no edge to
// add. Check for this case.
- Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
+ auto *NewCall = dyn_cast<CallBase>(VMI->second);
if (!NewCall)
continue;
// We do not treat intrinsic calls like real function calls because we
// expect them to become inline code; do not add an edge for an intrinsic.
- CallSite CS = CallSite(NewCall);
- if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic())
+ if (NewCall->getCalledFunction() &&
+ NewCall->getCalledFunction()->isIntrinsic())
continue;
// Remember that this call site got inlined for the client of
@@ -1236,19 +1228,19 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
// destination. This can also happen if the call graph node of the caller
// was just unnecessarily imprecise.
if (!I->second->getFunction())
- if (Function *F = CallSite(NewCall).getCalledFunction()) {
+ if (Function *F = NewCall->getCalledFunction()) {
// Indirect call site resolved to direct call.
- CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
+ CallerNode->addCalledFunction(NewCall, CG[F]);
continue;
}
- CallerNode->addCalledFunction(CallSite(NewCall), I->second);
+ CallerNode->addCalledFunction(NewCall, I->second);
}
// Update the call graph by deleting the edge from Callee to Caller. We must
// do this after the loop above in case Caller and Callee are the same.
- CallerNode->removeCallEdgeFor(CS);
+ CallerNode->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction()));
}
static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
@@ -1353,6 +1345,44 @@ static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca();
}
+/// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL
+/// inlined at \p InlinedAt. \p IANodes is an inlined-at cache.
+static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
+ LLVMContext &Ctx,
+ DenseMap<const MDNode *, MDNode *> &IANodes) {
+ auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes);
+ return DebugLoc::get(OrigDL.getLine(), OrigDL.getCol(), OrigDL.getScope(),
+ IA);
+}
+
+/// Returns the LoopID for a loop which has has been cloned from another
+/// function for inlining with the new inlined-at start and end locs.
+static MDNode *inlineLoopID(const MDNode *OrigLoopId, DILocation *InlinedAt,
+ LLVMContext &Ctx,
+ DenseMap<const MDNode *, MDNode *> &IANodes) {
+ assert(OrigLoopId && OrigLoopId->getNumOperands() > 0 &&
+ "Loop ID needs at least one operand");
+ assert(OrigLoopId && OrigLoopId->getOperand(0).get() == OrigLoopId &&
+ "Loop ID should refer to itself");
+
+ // Save space for the self-referential LoopID.
+ SmallVector<Metadata *, 4> MDs = {nullptr};
+
+ for (unsigned i = 1; i < OrigLoopId->getNumOperands(); ++i) {
+ Metadata *MD = OrigLoopId->getOperand(i);
+ // Update the DILocations to encode the inlined-at metadata.
+ if (DILocation *DL = dyn_cast<DILocation>(MD))
+ MDs.push_back(inlineDebugLoc(DL, InlinedAt, Ctx, IANodes));
+ else
+ MDs.push_back(MD);
+ }
+
+ MDNode *NewLoopID = MDNode::getDistinct(Ctx, MDs);
+ // Insert the self-referential LoopID.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ return NewLoopID;
+}
+
/// Update inlined instructions' line numbers to
/// to encode location where these instructions are inlined.
static void fixupLineNumbers(Function *Fn, Function::iterator FI,
@@ -1378,10 +1408,17 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
for (; FI != Fn->end(); ++FI) {
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
BI != BE; ++BI) {
+ // Loop metadata needs to be updated so that the start and end locs
+ // reference inlined-at locations.
+ if (MDNode *LoopID = BI->getMetadata(LLVMContext::MD_loop)) {
+ MDNode *NewLoopID =
+ inlineLoopID(LoopID, InlinedAtNode, BI->getContext(), IANodes);
+ BI->setMetadata(LLVMContext::MD_loop, NewLoopID);
+ }
+
if (DebugLoc DL = BI->getDebugLoc()) {
- auto IA = DebugLoc::appendInlinedAt(DL, InlinedAtNode, BI->getContext(),
- IANodes);
- auto IDL = DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), IA);
+ DebugLoc IDL =
+ inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes);
BI->setDebugLoc(IDL);
continue;
}
@@ -1448,47 +1485,45 @@ static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
CalleeEntryCount.getCount() < 1)
return;
auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None;
- uint64_t CallCount =
+ int64_t CallCount =
std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0,
CalleeEntryCount.getCount());
-
- for (auto const &Entry : VMap)
- if (isa<CallInst>(Entry.first))
- if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
- CI->updateProfWeight(CallCount, CalleeEntryCount.getCount());
- for (BasicBlock &BB : *Callee)
- // No need to update the callsite if it is pruned during inlining.
- if (VMap.count(&BB))
- for (Instruction &I : BB)
- if (CallInst *CI = dyn_cast<CallInst>(&I))
- CI->updateProfWeight(CalleeEntryCount.getCount() - CallCount,
- CalleeEntryCount.getCount());
+ updateProfileCallee(Callee, -CallCount, &VMap);
}
-/// Update the entry count of callee after inlining.
-///
-/// The callsite's block count is subtracted from the callee's function entry
-/// count.
-static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB,
- Instruction *CallInst, Function *Callee,
- ProfileSummaryInfo *PSI) {
- // If the callee has a original count of N, and the estimated count of
- // callsite is M, the new callee count is set to N - M. M is estimated from
- // the caller's entry count, its entry block frequency and the block frequency
- // of the callsite.
+void llvm::updateProfileCallee(
+ Function *Callee, int64_t entryDelta,
+ const ValueMap<const Value *, WeakTrackingVH> *VMap) {
auto CalleeCount = Callee->getEntryCount();
- if (!CalleeCount.hasValue() || !PSI)
- return;
- auto CallCount = PSI->getProfileCount(CallInst, CallerBFI);
- if (!CallCount.hasValue())
+ if (!CalleeCount.hasValue())
return;
+
+ uint64_t priorEntryCount = CalleeCount.getCount();
+ uint64_t newEntryCount;
+
// Since CallSiteCount is an estimate, it could exceed the original callee
- // count and has to be set to 0.
- if (CallCount.getValue() > CalleeCount.getCount())
- CalleeCount.setCount(0);
+ // count and has to be set to 0 so guard against underflow.
+ if (entryDelta < 0 && static_cast<uint64_t>(-entryDelta) > priorEntryCount)
+ newEntryCount = 0;
else
- CalleeCount.setCount(CalleeCount.getCount() - CallCount.getValue());
- Callee->setEntryCount(CalleeCount);
+ newEntryCount = priorEntryCount + entryDelta;
+
+ Callee->setEntryCount(newEntryCount);
+
+ // During inlining ?
+ if (VMap) {
+ uint64_t cloneEntryCount = priorEntryCount - newEntryCount;
+ for (auto const &Entry : *VMap)
+ if (isa<CallInst>(Entry.first))
+ if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
+ CI->updateProfWeight(cloneEntryCount, priorEntryCount);
+ }
+ for (BasicBlock &BB : *Callee)
+ // No need to update the callsite if it is pruned during inlining.
+ if (!VMap || VMap->count(&BB))
+ for (Instruction &I : BB)
+ if (CallInst *CI = dyn_cast<CallInst>(&I))
+ CI->updateProfWeight(newEntryCount, priorEntryCount);
}
/// This function inlines the called function into the basic block of the
@@ -1507,6 +1542,10 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
assert(TheCall->getParent() && TheCall->getFunction()
&& "Instruction not in function!");
+ // FIXME: we don't inline callbr yet.
+ if (isa<CallBrInst>(TheCall))
+ return false;
+
// If IFI has any state in it, zap it before we fill it in.
IFI.reset();
@@ -1684,8 +1723,6 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall,
IFI.PSI, IFI.CallerBFI);
- // Update the profile count of callee.
- updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc, IFI.PSI);
// Inject byval arguments initialization.
for (std::pair<Value*, Value*> &Init : ByValInit)
@@ -1734,6 +1771,8 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
Instruction *NewI = nullptr;
if (isa<CallInst>(I))
NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I);
+ else if (isa<CallBrInst>(I))
+ NewI = CallBrInst::Create(cast<CallBrInst>(I), OpDefs, I);
else
NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I);
@@ -1817,8 +1856,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Move any dbg.declares describing the allocas into the entry basic block.
DIBuilder DIB(*Caller->getParent());
for (auto &AI : IFI.StaticAllocas)
- replaceDbgDeclareForAlloca(AI, AI, DIB, DIExpression::NoDeref, 0,
- DIExpression::NoDeref);
+ replaceDbgDeclareForAlloca(AI, AI, DIB, DIExpression::ApplyOffset, 0);
}
SmallVector<Value*,4> VarArgsToForward;
@@ -1869,10 +1907,8 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Add VarArgs to existing parameters.
SmallVector<Value *, 6> Params(CI->arg_operands());
Params.append(VarArgsToForward.begin(), VarArgsToForward.end());
- CallInst *NewCI =
- CallInst::Create(CI->getCalledFunction() ? CI->getCalledFunction()
- : CI->getCalledValue(),
- Params, "", CI);
+ CallInst *NewCI = CallInst::Create(
+ CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI);
NewCI->setDebugLoc(CI->getDebugLoc());
NewCI->setAttributes(Attrs);
NewCI->setCallingConv(CI->getCallingConv());
@@ -2038,6 +2074,8 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
Instruction *NewInst;
if (CS.isCall())
NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I);
+ else if (CS.isCallBr())
+ NewInst = CallBrInst::Create(cast<CallBrInst>(I), OpBundles, I);
else
NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I);
NewInst->takeName(I);
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index 003721f2b939..6c4fc1ceb991 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -1,9 +1,8 @@
//===- InstructionNamer.cpp - Give anonymous instructions names -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index 4a359b99bebd..9082049c82da 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -1,9 +1,8 @@
//===-- IntegerDivision.cpp - Expand integer division ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 53d444b309d5..29e7c5260f46 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -1,9 +1,8 @@
//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,11 +31,12 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -45,6 +45,7 @@
#include "llvm/IR/PredIteratorCache.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
@@ -198,6 +199,17 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
continue;
}
+ // If we added a single PHI, it must dominate all uses and we can directly
+ // rename it.
+ if (AddedPHIs.size() == 1) {
+ // Tell the VHs that the uses changed. This updates SCEV's caches.
+ // We might call ValueIsRAUWd multiple times for the same value.
+ if (UseToRewrite->get()->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(*UseToRewrite, AddedPHIs[0]);
+ UseToRewrite->set(AddedPHIs[0]);
+ continue;
+ }
+
// Otherwise, do full PHI insertion.
SSAUpdate.RewriteUse(*UseToRewrite);
}
@@ -211,9 +223,12 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
BasicBlock *UserBB = DVI->getParent();
if (InstBB == UserBB || L->contains(UserBB))
continue;
- // We currently only handle debug values residing in blocks where we have
- // inserted a PHI instruction.
- if (Value *V = SSAUpdate.FindValueForBlock(UserBB))
+ // We currently only handle debug values residing in blocks that were
+ // traversed while rewriting the uses. If we inserted just a single PHI,
+ // we will handle all relevant debug values.
+ Value *V = AddedPHIs.size() == 1 ? AddedPHIs[0]
+ : SSAUpdate.FindValueForBlock(UserBB);
+ if (V)
DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V)));
}
@@ -306,6 +321,12 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
ScalarEvolution *SE) {
bool Changed = false;
+#ifdef EXPENSIVE_CHECKS
+ // Verify all sub-loops are in LCSSA form already.
+ for (Loop *SubLoop: L)
+ assert(SubLoop->isRecursivelyLCSSAForm(DT, *LI) && "Subloop not in LCSSA!");
+#endif
+
SmallVector<BasicBlock *, 8> ExitBlocks;
L.getExitBlocks(ExitBlocks);
if (ExitBlocks.empty())
@@ -325,6 +346,10 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
// Look at all the instructions in the loop, checking to see if they have uses
// outside the loop. If so, put them into the worklist to rewrite those uses.
for (BasicBlock *BB : BlocksDominatingExits) {
+ // Skip blocks that are part of any sub-loops, they must be in LCSSA
+ // already.
+ if (LI->getLoopFor(BB) != &L)
+ continue;
for (Instruction &I : *BB) {
// Reject two common cases fast: instructions with no uses (like stores)
// and instructions with one use that is in the same block as this.
@@ -419,6 +444,8 @@ struct LCSSAWrapperPass : public FunctionPass {
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<SCEVAAWrapperPass>();
+ AU.addPreserved<BranchProbabilityInfoWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
// This is needed to perform LCSSA verification inside LPPassManager
AU.addRequired<LCSSAVerificationPass>();
@@ -462,5 +489,9 @@ PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserve<GlobalsAA>();
PA.preserve<SCEVAA>();
PA.preserve<ScalarEvolutionAnalysis>();
+ // BPI maps terminators to probabilities, since we don't modify the CFG, no
+ // updates are needed to preserve it.
+ PA.preserve<BranchProbabilityAnalysis>();
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
diff --git a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index e1592c867636..8c67d1dc6eb3 100644
--- a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -1,9 +1,8 @@
//===-- LibCallsShrinkWrap.cpp ----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 499e611acb57..39b6b889f91c 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -1,9 +1,8 @@
//===- Local.cpp - Functions to perform local transformations -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,6 +26,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
@@ -49,7 +49,6 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -92,6 +91,10 @@ using namespace llvm::PatternMatch;
STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
+// Max recursion depth for collectBitParts used when detecting bswap and
+// bitreverse idioms
+static const unsigned BitPartRecursionMaxDepth = 64;
+
//===----------------------------------------------------------------------===//
// Local constant propagation.
//
@@ -129,7 +132,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
Builder.CreateBr(Destination);
BI->eraseFromParent();
if (DTU)
- DTU->deleteEdgeRelaxed(BB, OldDest);
+ DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, OldDest}});
return true;
}
@@ -205,7 +208,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
i = SI->removeCase(i);
e = SI->case_end();
if (DTU)
- DTU->deleteEdgeRelaxed(ParentBB, DefaultDest);
+ DTU->applyUpdatesPermissive(
+ {{DominatorTree::Delete, ParentBB, DefaultDest}});
continue;
}
@@ -253,7 +257,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
if (DeleteDeadConditions)
RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
if (DTU)
- DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->applyUpdatesPermissive(Updates);
return true;
}
@@ -331,7 +335,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
}
if (DTU)
- DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->applyUpdatesPermissive(Updates);
return true;
}
}
@@ -416,8 +420,8 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
return C->isNullValue() || isa<UndefValue>(C);
- if (CallSite CS = CallSite(I))
- if (isMathLibCallNoop(CS, TLI))
+ if (auto *Call = dyn_cast<CallBase>(I))
+ if (isMathLibCallNoop(Call, TLI))
return true;
return false;
@@ -430,7 +434,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
bool llvm::RecursivelyDeleteTriviallyDeadInstructions(
Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU) {
Instruction *I = dyn_cast<Instruction>(V);
- if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI))
+ if (!I || !isInstructionTriviallyDead(I, TLI))
return false;
SmallVector<Instruction*, 16> DeadInsts;
@@ -665,7 +669,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
if (PhiIt != OldPhiIt) PhiIt = &BB->front();
}
if (DTU)
- DTU->deleteEdgeRelaxed(Pred, BB);
+ DTU->applyUpdatesPermissive({{DominatorTree::Delete, Pred, BB}});
}
/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
@@ -734,7 +738,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
isa<UnreachableInst>(PredBB->getTerminator()) &&
"The successor list of PredBB isn't empty before "
"applying corresponding DTU updates.");
- DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->applyUpdatesPermissive(Updates);
DTU->deleteBB(PredBB);
// Recalculation of DomTree is needed when updating a forward DomTree and
// the Entry BB is replaced.
@@ -997,6 +1001,18 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
}
}
+ // We cannot fold the block if it's a branch to an already present callbr
+ // successor because that creates duplicate successors.
+ for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+ if (auto *CBI = dyn_cast<CallBrInst>((*I)->getTerminator())) {
+ if (Succ == CBI->getDefaultDest())
+ return false;
+ for (unsigned i = 0, e = CBI->getNumIndirectDests(); i != e; ++i)
+ if (Succ == CBI->getIndirectDest(i))
+ return false;
+ }
+ }
+
LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
SmallVector<DominatorTree::UpdateType, 32> Updates;
@@ -1064,7 +1080,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
"applying corresponding DTU updates.");
if (DTU) {
- DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->applyUpdatesPermissive(Updates);
DTU->deleteBB(BB);
} else {
BB->eraseFromParent(); // Delete the old basic block.
@@ -1272,6 +1288,19 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) {
return false;
}
+/// Produce a DebugLoc to use for each dbg.declare/inst pair that are promoted
+/// to a dbg.value. Because no machine insts can come from debug intrinsics,
+/// only the scope and inlinedAt is significant. Zero line numbers are used in
+/// case this DebugLoc leaks into any adjacent instructions.
+static DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII, Instruction *Src) {
+ // Original dbg.declare must have a location.
+ DebugLoc DeclareLoc = DII->getDebugLoc();
+ MDNode *Scope = DeclareLoc.getScope();
+ DILocation *InlinedAt = DeclareLoc.getInlinedAt();
+ // Produce an unknown location with the correct scope / inlinedAt fields.
+ return DebugLoc::get(0, 0, Scope, InlinedAt);
+}
+
/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
@@ -1280,9 +1309,11 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
auto *DIVar = DII->getVariable();
assert(DIVar && "Missing variable");
auto *DIExpr = DII->getExpression();
- Value *DV = SI->getOperand(0);
+ Value *DV = SI->getValueOperand();
+
+ DebugLoc NewLoc = getDebugValueLoc(DII, SI);
- if (!valueCoversEntireFragment(SI->getValueOperand()->getType(), DII)) {
+ if (!valueCoversEntireFragment(DV->getType(), DII)) {
// FIXME: If storing to a part of the variable described by the dbg.declare,
// then we want to insert a dbg.value for the corresponding fragment.
LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: "
@@ -1292,14 +1323,12 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
// know nothing about the variable's content.
DV = UndefValue::get(DV->getType());
if (!LdStHasDebugValue(DIVar, DIExpr, SI))
- Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, DII->getDebugLoc(),
- SI);
+ Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
return;
}
if (!LdStHasDebugValue(DIVar, DIExpr, SI))
- Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, DII->getDebugLoc(),
- SI);
+ Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
}
/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
@@ -1322,12 +1351,14 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
return;
}
+ DebugLoc NewLoc = getDebugValueLoc(DII, nullptr);
+
// We are now tracking the loaded value instead of the address. In the
// future if multi-location support is added to the IR, it might be
// preferable to keep tracking both the loaded value and the original
// address in case the alloca can not be elided.
Instruction *DbgValue = Builder.insertDbgValueIntrinsic(
- LI, DIVar, DIExpr, DII->getDebugLoc(), (Instruction *)nullptr);
+ LI, DIVar, DIExpr, NewLoc, (Instruction *)nullptr);
DbgValue->insertAfter(LI);
}
@@ -1354,12 +1385,13 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
BasicBlock *BB = APN->getParent();
auto InsertionPt = BB->getFirstInsertionPt();
+ DebugLoc NewLoc = getDebugValueLoc(DII, nullptr);
+
// The block may be a catchswitch block, which does not have a valid
// insertion point.
// FIXME: Insert dbg.value markers in the successors when appropriate.
if (InsertionPt != BB->end())
- Builder.insertDbgValueIntrinsic(APN, DIVar, DIExpr, DII->getDebugLoc(),
- &*InsertionPt);
+ Builder.insertDbgValueIntrinsic(APN, DIVar, DIExpr, NewLoc, &*InsertionPt);
}
/// Determine whether this alloca is either a VLA or an array.
@@ -1414,10 +1446,11 @@ bool llvm::LowerDbgDeclare(Function &F) {
// This is a call by-value or some other instruction that takes a
// pointer to the variable. Insert a *value* intrinsic that describes
// the variable by dereferencing the alloca.
+ DebugLoc NewLoc = getDebugValueLoc(DDI, nullptr);
auto *DerefExpr =
DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref);
- DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr,
- DDI->getDebugLoc(), CI);
+ DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr, NewLoc,
+ CI);
}
}
DDI->eraseFromParent();
@@ -1519,14 +1552,14 @@ void llvm::findDbgUsers(SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers,
bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
Instruction *InsertBefore, DIBuilder &Builder,
- bool DerefBefore, int Offset, bool DerefAfter) {
+ uint8_t DIExprFlags, int Offset) {
auto DbgAddrs = FindDbgAddrUses(Address);
for (DbgVariableIntrinsic *DII : DbgAddrs) {
DebugLoc Loc = DII->getDebugLoc();
auto *DIVar = DII->getVariable();
auto *DIExpr = DII->getExpression();
assert(DIVar && "Missing variable");
- DIExpr = DIExpression::prepend(DIExpr, DerefBefore, Offset, DerefAfter);
+ DIExpr = DIExpression::prepend(DIExpr, DIExprFlags, Offset);
// Insert llvm.dbg.declare immediately before InsertBefore, and remove old
// llvm.dbg.declare.
Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore);
@@ -1538,10 +1571,10 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
}
bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
- DIBuilder &Builder, bool DerefBefore,
- int Offset, bool DerefAfter) {
+ DIBuilder &Builder, uint8_t DIExprFlags,
+ int Offset) {
return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder,
- DerefBefore, Offset, DerefAfter);
+ DIExprFlags, Offset);
}
static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
@@ -1594,120 +1627,119 @@ bool llvm::salvageDebugInfo(Instruction &I) {
if (DbgUsers.empty())
return false;
- auto &M = *I.getModule();
- auto &DL = M.getDataLayout();
+ return salvageDebugInfoForDbgValues(I, DbgUsers);
+}
+
+bool llvm::salvageDebugInfoForDbgValues(
+ Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers) {
auto &Ctx = I.getContext();
auto wrapMD = [&](Value *V) { return wrapValueInMetadata(Ctx, V); };
- auto doSalvage = [&](DbgVariableIntrinsic *DII, SmallVectorImpl<uint64_t> &Ops) {
- auto *DIExpr = DII->getExpression();
- if (!Ops.empty()) {
- // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they
- // are implicitly pointing out the value as a DWARF memory location
- // description.
- bool WithStackValue = isa<DbgValueInst>(DII);
- DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue);
- }
+ for (auto *DII : DbgUsers) {
+ // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they
+ // are implicitly pointing out the value as a DWARF memory location
+ // description.
+ bool StackValue = isa<DbgValueInst>(DII);
+
+ DIExpression *DIExpr =
+ salvageDebugInfoImpl(I, DII->getExpression(), StackValue);
+
+ // salvageDebugInfoImpl should fail on examining the first element of
+ // DbgUsers, or none of them.
+ if (!DIExpr)
+ return false;
+
DII->setOperand(0, wrapMD(I.getOperand(0)));
DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr));
LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
+ }
+
+ return true;
+}
+
+DIExpression *llvm::salvageDebugInfoImpl(Instruction &I,
+ DIExpression *SrcDIExpr,
+ bool WithStackValue) {
+ auto &M = *I.getModule();
+ auto &DL = M.getDataLayout();
+
+ // Apply a vector of opcodes to the source DIExpression.
+ auto doSalvage = [&](SmallVectorImpl<uint64_t> &Ops) -> DIExpression * {
+ DIExpression *DIExpr = SrcDIExpr;
+ if (!Ops.empty()) {
+ DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue);
+ }
+ return DIExpr;
};
- auto applyOffset = [&](DbgVariableIntrinsic *DII, uint64_t Offset) {
+ // Apply the given offset to the source DIExpression.
+ auto applyOffset = [&](uint64_t Offset) -> DIExpression * {
SmallVector<uint64_t, 8> Ops;
DIExpression::appendOffset(Ops, Offset);
- doSalvage(DII, Ops);
+ return doSalvage(Ops);
};
- auto applyOps = [&](DbgVariableIntrinsic *DII,
- std::initializer_list<uint64_t> Opcodes) {
+ // initializer-list helper for applying operators to the source DIExpression.
+ auto applyOps =
+ [&](std::initializer_list<uint64_t> Opcodes) -> DIExpression * {
SmallVector<uint64_t, 8> Ops(Opcodes);
- doSalvage(DII, Ops);
+ return doSalvage(Ops);
};
if (auto *CI = dyn_cast<CastInst>(&I)) {
- if (!CI->isNoopCast(DL))
- return false;
-
- // No-op casts are irrelevant for debug info.
- MetadataAsValue *CastSrc = wrapMD(I.getOperand(0));
- for (auto *DII : DbgUsers) {
- DII->setOperand(0, CastSrc);
- LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
- }
- return true;
+ // No-op casts and zexts are irrelevant for debug info.
+ if (CI->isNoopCast(DL) || isa<ZExtInst>(&I))
+ return SrcDIExpr;
+ return nullptr;
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
unsigned BitWidth =
M.getDataLayout().getIndexSizeInBits(GEP->getPointerAddressSpace());
- // Rewrite a constant GEP into a DIExpression. Since we are performing
- // arithmetic to compute the variable's *value* in the DIExpression, we
- // need to mark the expression with a DW_OP_stack_value.
+ // Rewrite a constant GEP into a DIExpression.
APInt Offset(BitWidth, 0);
- if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset))
- for (auto *DII : DbgUsers)
- applyOffset(DII, Offset.getSExtValue());
- return true;
+ if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) {
+ return applyOffset(Offset.getSExtValue());
+ } else {
+ return nullptr;
+ }
} else if (auto *BI = dyn_cast<BinaryOperator>(&I)) {
// Rewrite binary operations with constant integer operands.
auto *ConstInt = dyn_cast<ConstantInt>(I.getOperand(1));
if (!ConstInt || ConstInt->getBitWidth() > 64)
- return false;
+ return nullptr;
uint64_t Val = ConstInt->getSExtValue();
- for (auto *DII : DbgUsers) {
- switch (BI->getOpcode()) {
- case Instruction::Add:
- applyOffset(DII, Val);
- break;
- case Instruction::Sub:
- applyOffset(DII, -int64_t(Val));
- break;
- case Instruction::Mul:
- applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_mul});
- break;
- case Instruction::SDiv:
- applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_div});
- break;
- case Instruction::SRem:
- applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_mod});
- break;
- case Instruction::Or:
- applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_or});
- break;
- case Instruction::And:
- applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_and});
- break;
- case Instruction::Xor:
- applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_xor});
- break;
- case Instruction::Shl:
- applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shl});
- break;
- case Instruction::LShr:
- applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shr});
- break;
- case Instruction::AShr:
- applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shra});
- break;
- default:
- // TODO: Salvage constants from each kind of binop we know about.
- return false;
- }
+ switch (BI->getOpcode()) {
+ case Instruction::Add:
+ return applyOffset(Val);
+ case Instruction::Sub:
+ return applyOffset(-int64_t(Val));
+ case Instruction::Mul:
+ return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_mul});
+ case Instruction::SDiv:
+ return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_div});
+ case Instruction::SRem:
+ return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_mod});
+ case Instruction::Or:
+ return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_or});
+ case Instruction::And:
+ return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_and});
+ case Instruction::Xor:
+ return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_xor});
+ case Instruction::Shl:
+ return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shl});
+ case Instruction::LShr:
+ return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shr});
+ case Instruction::AShr:
+ return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shra});
+ default:
+ // TODO: Salvage constants from each kind of binop we know about.
+ return nullptr;
}
- return true;
- } else if (isa<LoadInst>(&I)) {
- MetadataAsValue *AddrMD = wrapMD(I.getOperand(0));
- for (auto *DII : DbgUsers) {
- // Rewrite the load into DW_OP_deref.
- auto *DIExpr = DII->getExpression();
- DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref);
- DII->setOperand(0, AddrMD);
- DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr));
- LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
- }
- return true;
+ // *Not* to do: we should not attempt to salvage load instructions,
+ // because the validity and lifetime of a dbg.value containing
+ // DW_OP_deref becomes difficult to analyze. See PR40628 for examples.
}
- return false;
+ return nullptr;
}
/// A replacement for a dbg.value expression.
@@ -1849,21 +1881,10 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
return None;
bool Signed = *Signedness == DIBasicType::Signedness::Signed;
-
- if (!Signed) {
- // In the unsigned case, assume that a debugger will initialize the
- // high bits to 0 and do a no-op conversion.
- return Identity(DII);
- } else {
- // In the signed case, the high bits are given by sign extension, i.e:
- // (To >> (ToBits - 1)) * ((2 ^ FromBits) - 1)
- // Calculate the high bits and OR them together with the low bits.
- SmallVector<uint64_t, 8> Ops({dwarf::DW_OP_dup, dwarf::DW_OP_constu,
- (ToBits - 1), dwarf::DW_OP_shr,
- dwarf::DW_OP_lit0, dwarf::DW_OP_not,
- dwarf::DW_OP_mul, dwarf::DW_OP_or});
- return DIExpression::appendToStack(DII.getExpression(), Ops);
- }
+ dwarf::TypeKind TK = Signed ? dwarf::DW_ATE_signed : dwarf::DW_ATE_unsigned;
+ SmallVector<uint64_t, 8> Ops({dwarf::DW_OP_LLVM_convert, ToBits, TK,
+ dwarf::DW_OP_LLVM_convert, FromBits, TK});
+ return DIExpression::appendToStack(DII.getExpression(), Ops);
};
return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt);
}
@@ -1894,10 +1915,14 @@ unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
}
unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
- bool PreserveLCSSA, DomTreeUpdater *DTU) {
+ bool PreserveLCSSA, DomTreeUpdater *DTU,
+ MemorySSAUpdater *MSSAU) {
BasicBlock *BB = I->getParent();
std::vector <DominatorTree::UpdateType> Updates;
+ if (MSSAU)
+ MSSAU->changeToUnreachable(I);
+
// Loop over all of the successors, removing BB's entry from any PHI
// nodes.
if (DTU)
@@ -1928,7 +1953,7 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
++NumInstrsRemoved;
}
if (DTU)
- DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->applyUpdatesPermissive(Updates);
return NumInstrsRemoved;
}
@@ -1937,8 +1962,8 @@ static void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr) {
SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
SmallVector<OperandBundleDef, 1> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
- CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, OpBundles,
- "", II);
+ CallInst *NewCall = CallInst::Create(
+ II->getFunctionType(), II->getCalledValue(), Args, OpBundles, "", II);
NewCall->takeName(II);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
@@ -1956,7 +1981,7 @@ static void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr) {
UnwindDestBB->removePredecessor(BB);
II->eraseFromParent();
if (DTU)
- DTU->deleteEdgeRelaxed(BB, UnwindDestBB);
+ DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, UnwindDestBB}});
}
BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
@@ -1981,8 +2006,9 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
// can potentially be avoided with a cleverer API design that we do not have
// as of this time.
- InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge,
- InvokeArgs, OpBundles, CI->getName(), BB);
+ InvokeInst *II =
+ InvokeInst::Create(CI->getFunctionType(), CI->getCalledValue(), Split,
+ UnwindEdge, InvokeArgs, OpBundles, CI->getName(), BB);
II->setDebugLoc(CI->getDebugLoc());
II->setCallingConv(CI->getCallingConv());
II->setAttributes(CI->getAttributes());
@@ -2052,7 +2078,7 @@ static bool markAliveBlocks(Function &F,
Changed = true;
break;
}
- if (CI->doesNotReturn()) {
+ if (CI->doesNotReturn() && !CI->isMustTailCall()) {
// If we found a call to a no-return function, insert an unreachable
// instruction after it. Make sure there isn't *already* one there
// though.
@@ -2102,7 +2128,8 @@ static bool markAliveBlocks(Function &F,
UnwindDestBB->removePredecessor(II->getParent());
II->eraseFromParent();
if (DTU)
- DTU->deleteEdgeRelaxed(BB, UnwindDestBB);
+ DTU->applyUpdatesPermissive(
+ {{DominatorTree::Delete, BB, UnwindDestBB}});
} else
changeToCall(II, DTU);
Changed = true;
@@ -2191,7 +2218,7 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) {
TI->replaceAllUsesWith(NewTI);
TI->eraseFromParent();
if (DTU)
- DTU->deleteEdgeRelaxed(BB, UnwindDest);
+ DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, UnwindDest}});
}
/// removeUnreachableBlocks - Remove blocks that are not reachable, even
@@ -2211,7 +2238,7 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
assert(Reachable.size() < F.size());
NumRemoved += F.size()-Reachable.size();
- SmallPtrSet<BasicBlock *, 16> DeadBlockSet;
+ SmallSetVector<BasicBlock *, 8> DeadBlockSet;
for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) {
auto *BB = &*I;
if (Reachable.count(BB))
@@ -2256,7 +2283,7 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
}
if (DTU) {
- DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->applyUpdatesPermissive(Updates);
bool Deleted = false;
for (auto *BB : DeadBlockSet) {
if (DTU->isBBPendingDeletion(BB))
@@ -2450,12 +2477,12 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
return ::replaceDominatedUsesWith(From, To, BB, ProperlyDominates);
}
-bool llvm::callsGCLeafFunction(ImmutableCallSite CS,
+bool llvm::callsGCLeafFunction(const CallBase *Call,
const TargetLibraryInfo &TLI) {
// Check if the function is specifically marked as a gc leaf function.
- if (CS.hasFnAttr("gc-leaf-function"))
+ if (Call->hasFnAttr("gc-leaf-function"))
return true;
- if (const Function *F = CS.getCalledFunction()) {
+ if (const Function *F = Call->getCalledFunction()) {
if (F->hasFnAttribute("gc-leaf-function"))
return true;
@@ -2469,7 +2496,7 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS,
// marked as 'gc-leaf-function.' All available Libcalls are
// GC-leaf.
LibFunc LF;
- if (TLI.getLibFunc(CS, LF)) {
+ if (TLI.getLibFunc(ImmutableCallSite(Call), LF)) {
return TLI.has(LF);
}
@@ -2530,13 +2557,13 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
BasicBlock *BB) {
// Since we are moving the instructions out of its basic block, we do not
// retain their original debug locations (DILocations) and debug intrinsic
- // instructions (dbg.values).
+ // instructions.
//
// Doing so would degrade the debugging experience and adversely affect the
// accuracy of profiling information.
//
// Currently, when hoisting the instructions, we take the following actions:
- // - Remove their dbg.values.
+ // - Remove their debug intrinsic instructions.
// - Set their debug locations to the values from the insertion point.
//
// As per PR39141 (comment #8), the more fundamental reason why the dbg.values
@@ -2554,7 +2581,7 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
I->dropUnknownNonDebugMetadata();
if (I->isUsedByMetadata())
dropDebugUsers(*I);
- if (isa<DbgVariableIntrinsic>(I)) {
+ if (isa<DbgInfoIntrinsic>(I)) {
// Remove DbgInfo Intrinsics.
II = I->eraseFromParent();
continue;
@@ -2613,7 +2640,7 @@ struct BitPart {
/// does not invalidate internal references (std::map instead of DenseMap).
static const Optional<BitPart> &
collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
- std::map<Value *, Optional<BitPart>> &BPS) {
+ std::map<Value *, Optional<BitPart>> &BPS, int Depth) {
auto I = BPS.find(V);
if (I != BPS.end())
return I->second;
@@ -2621,13 +2648,19 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
auto &Result = BPS[V] = None;
auto BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+ // Prevent stack overflow by limiting the recursion depth
+ if (Depth == BitPartRecursionMaxDepth) {
+ LLVM_DEBUG(dbgs() << "collectBitParts max recursion depth reached.\n");
+ return Result;
+ }
+
if (Instruction *I = dyn_cast<Instruction>(V)) {
// If this is an or instruction, it may be an inner node of the bswap.
if (I->getOpcode() == Instruction::Or) {
auto &A = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS);
+ MatchBitReversals, BPS, Depth + 1);
auto &B = collectBitParts(I->getOperand(1), MatchBSwaps,
- MatchBitReversals, BPS);
+ MatchBitReversals, BPS, Depth + 1);
if (!A || !B)
return Result;
@@ -2660,7 +2693,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
return Result;
auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS);
+ MatchBitReversals, BPS, Depth + 1);
if (!Res)
return Result;
Result = Res;
@@ -2692,7 +2725,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
return Result;
auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS);
+ MatchBitReversals, BPS, Depth + 1);
if (!Res)
return Result;
Result = Res;
@@ -2707,7 +2740,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
// If this is a zext instruction zero extend the result.
if (I->getOpcode() == Instruction::ZExt) {
auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS);
+ MatchBitReversals, BPS, Depth + 1);
if (!Res)
return Result;
@@ -2769,7 +2802,7 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
// Try to find all the pieces corresponding to the bswap.
std::map<Value *, Optional<BitPart>> BPS;
- auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS);
+ auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS, 0);
if (!Res)
return false;
auto &BitProvenance = Res->Provenance;
@@ -2883,3 +2916,41 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
return true;
}
}
+
+using AllocaForValueMapTy = DenseMap<Value *, AllocaInst *>;
+AllocaInst *llvm::findAllocaForValue(Value *V,
+ AllocaForValueMapTy &AllocaForValue) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(V))
+ return AI;
+ // See if we've already calculated (or started to calculate) alloca for a
+ // given value.
+ AllocaForValueMapTy::iterator I = AllocaForValue.find(V);
+ if (I != AllocaForValue.end())
+ return I->second;
+ // Store 0 while we're calculating alloca for value V to avoid
+ // infinite recursion if the value references itself.
+ AllocaForValue[V] = nullptr;
+ AllocaInst *Res = nullptr;
+ if (CastInst *CI = dyn_cast<CastInst>(V))
+ Res = findAllocaForValue(CI->getOperand(0), AllocaForValue);
+ else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ for (Value *IncValue : PN->incoming_values()) {
+ // Allow self-referencing phi-nodes.
+ if (IncValue == PN)
+ continue;
+ AllocaInst *IncValueAI = findAllocaForValue(IncValue, AllocaForValue);
+ // AI for incoming values should exist and should all be equal.
+ if (IncValueAI == nullptr || (Res != nullptr && IncValueAI != Res))
+ return nullptr;
+ Res = IncValueAI;
+ }
+ } else if (GetElementPtrInst *EP = dyn_cast<GetElementPtrInst>(V)) {
+ Res = findAllocaForValue(EP->getPointerOperand(), AllocaForValue);
+ } else {
+ LLVM_DEBUG(dbgs() << "Alloca search cancelled on unknown instruction: "
+ << *V << "\n");
+ }
+ if (Res)
+ AllocaForValue[V] = Res;
+ return Res;
+}
diff --git a/lib/Transforms/Utils/LoopRotationUtils.cpp b/lib/Transforms/Utils/LoopRotationUtils.cpp
index 41f14a834617..37389a695b45 100644
--- a/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -1,9 +1,8 @@
//===----------------- LoopRotationUtils.cpp -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,6 +16,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
@@ -28,7 +28,6 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -296,7 +295,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// Begin by walking OrigHeader and populating ValueMap with an entry for
// each Instruction.
BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
- ValueToValueMapTy ValueMap;
+ ValueToValueMapTy ValueMap, ValueMapMSSA;
// For PHI nodes, the value available in OldPreHeader is just the
// incoming value from OldPreHeader.
@@ -375,6 +374,9 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
if (auto *II = dyn_cast<IntrinsicInst>(C))
if (II->getIntrinsicID() == Intrinsic::assume)
AC->registerAssumption(II);
+ // MemorySSA cares whether the cloned instruction was inserted or not, and
+ // not whether it can be remapped to a simplified value.
+ ValueMapMSSA[Inst] = C;
}
}
@@ -392,10 +394,11 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
LoopEntryBranch->eraseFromParent();
// Update MemorySSA before the rewrite call below changes the 1:1
- // instruction:cloned_instruction_or_value mapping in ValueMap.
+ // instruction:cloned_instruction_or_value mapping.
if (MSSAU) {
- ValueMap[OrigHeader] = OrigPreheader;
- MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader, ValueMap);
+ ValueMapMSSA[OrigHeader] = OrigPreheader;
+ MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader,
+ ValueMapMSSA);
}
SmallVector<PHINode*, 2> InsertedPHIs;
@@ -463,9 +466,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
for (BasicBlock *ExitPred : ExitPreds) {
// We only need to split loop exit edges.
Loop *PredLoop = LI->getLoopFor(ExitPred);
- if (!PredLoop || PredLoop->contains(Exit))
- continue;
- if (isa<IndirectBrInst>(ExitPred->getTerminator()))
+ if (!PredLoop || PredLoop->contains(Exit) ||
+ ExitPred->getTerminator()->isIndirectTerminator())
continue;
SplitLatchEdge |= L->getLoopLatch() == ExitPred;
BasicBlock *ExitSplit = SplitCriticalEdge(
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 380f4fca54d9..7e6da02d5707 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -1,9 +1,8 @@
//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,6 +27,9 @@
// to transform the loop and make these guarantees. Client code should check
// that these conditions are true before relying on them.
//
+// Similar complications arise from callbr instructions, particularly in
+// asm-goto where blockaddress expressions are used.
+//
// Note that the simplifycfg pass will clean up blocks which are split out but
// end up being unnecessary, so usage of this pass should not pessimize
// generated code.
@@ -46,13 +48,15 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -67,6 +71,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
@@ -115,7 +120,8 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB,
/// preheader insertion and analysis updating.
///
BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
- LoopInfo *LI, bool PreserveLCSSA) {
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA) {
BasicBlock *Header = L->getHeader();
// Compute the set of predecessors of the loop that are not in the loop.
@@ -124,10 +130,11 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
PI != PE; ++PI) {
BasicBlock *P = *PI;
if (!L->contains(P)) { // Coming in from outside the loop?
- // If the loop is branched to from an indirect branch, we won't
+ // If the loop is branched to from an indirect terminator, we won't
// be able to fully transform the loop, because it prohibits
// edge splitting.
- if (isa<IndirectBrInst>(P->getTerminator())) return nullptr;
+ if (P->getTerminator()->isIndirectTerminator())
+ return nullptr;
// Keep track of it.
OutsideBlocks.push_back(P);
@@ -137,7 +144,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
// Split out the loop pre-header.
BasicBlock *PreheaderBB;
PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT,
- LI, nullptr, PreserveLCSSA);
+ LI, MSSAU, PreserveLCSSA);
if (!PreheaderBB)
return nullptr;
@@ -217,7 +224,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, bool PreserveLCSSA,
- AssumptionCache *AC) {
+ AssumptionCache *AC, MemorySSAUpdater *MSSAU) {
// Don't try to separate loops without a preheader.
if (!Preheader)
return nullptr;
@@ -236,8 +243,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
if (PN->getIncomingValue(i) != PN ||
!L->contains(PN->getIncomingBlock(i))) {
- // We can't split indirectbr edges.
- if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
+ // We can't split indirect control flow edges.
+ if (PN->getIncomingBlock(i)->getTerminator()->isIndirectTerminator())
return nullptr;
OuterLoopPreds.push_back(PN->getIncomingBlock(i));
}
@@ -251,7 +258,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
SE->forgetLoop(L);
BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
- DT, LI, nullptr, PreserveLCSSA);
+ DT, LI, MSSAU, PreserveLCSSA);
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
@@ -314,7 +321,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
// Split edges to exit blocks from the inner loop, if they emerged in the
// process of separating the outer one.
- formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA);
+ formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA);
if (PreserveLCSSA) {
// Fix LCSSA form for L. Some values, which previously were only used inside
@@ -339,7 +346,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
/// and have that block branch to the loop header. This ensures that loops
/// have exactly one backedge.
static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
- DominatorTree *DT, LoopInfo *LI) {
+ DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU) {
assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
// Get information about the loop
@@ -358,8 +366,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
BasicBlock *P = *I;
- // Indirectbr edges cannot be split, so we must fail if we find one.
- if (isa<IndirectBrInst>(P->getTerminator()))
+ // Indirect edges cannot be split, so we must fail if we find one.
+ if (P->getTerminator()->isIndirectTerminator())
return nullptr;
if (P != Preheader) BackedgeBlocks.push_back(P);
@@ -439,9 +447,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
if (!LoopMD)
LoopMD = TI->getMetadata(LoopMDKind);
TI->setMetadata(LoopMDKind, nullptr);
- for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op)
- if (TI->getSuccessor(Op) == Header)
- TI->setSuccessor(Op, BEBlock);
+ TI->replaceSuccessorWith(Header, BEBlock);
}
BEBlock->getTerminator()->setMetadata(LoopMDKind, LoopMD);
@@ -454,6 +460,10 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
// Update dominator information
DT->splitBlock(BEBlock);
+ if (MSSAU)
+ MSSAU->updatePhisWhenInsertingUniqueBackedgeBlock(Header, Preheader,
+ BEBlock);
+
return BEBlock;
}
@@ -461,8 +471,11 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, AssumptionCache *AC,
- bool PreserveLCSSA) {
+ MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
bool Changed = false;
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
ReprocessLoop:
// Check to see that no blocks (other than the header) in this loop have
@@ -489,11 +502,15 @@ ReprocessLoop:
// Zap the dead pred's terminator and replace it with unreachable.
Instruction *TI = P->getTerminator();
- changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA);
+ changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA,
+ /*DTU=*/nullptr, MSSAU);
Changed = true;
}
}
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
// If there are exiting blocks with branches on undef, resolve the undef in
// the direction which will exit the loop. This will help simplify loop
// trip count computations.
@@ -518,7 +535,7 @@ ReprocessLoop:
// Does the loop already have a preheader? If so, don't insert one.
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
- Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
+ Preheader = InsertPreheaderForLoop(L, DT, LI, MSSAU, PreserveLCSSA);
if (Preheader)
Changed = true;
}
@@ -527,9 +544,12 @@ ReprocessLoop:
// predecessors that are inside of the loop. This check guarantees that the
// loop preheader/header will dominate the exit blocks. If the exit block has
// predecessors from outside of the loop, split the edge now.
- if (formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA))
+ if (formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA))
Changed = true;
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
// If the header has more than two predecessors at this point (from the
// preheader and from multiple backedges), we must adjust the loop.
BasicBlock *LoopLatch = L->getLoopLatch();
@@ -538,8 +558,8 @@ ReprocessLoop:
// this for loops with a giant number of backedges, just factor them into a
// common backedge instead.
if (L->getNumBackEdges() < 8) {
- if (Loop *OuterL =
- separateNestedLoop(L, Preheader, DT, LI, SE, PreserveLCSSA, AC)) {
+ if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE,
+ PreserveLCSSA, AC, MSSAU)) {
++NumNested;
// Enqueue the outer loop as it should be processed next in our
// depth-first nest walk.
@@ -556,11 +576,14 @@ ReprocessLoop:
// If we either couldn't, or didn't want to, identify nesting of the loops,
// insert a new block that all backedges target, then make it jump to the
// loop header.
- LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI);
+ LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI, MSSAU);
if (LoopLatch)
Changed = true;
}
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
// Scan over the PHI nodes in the loop header. Since they now have only two
@@ -618,9 +641,9 @@ ReprocessLoop:
Instruction *Inst = &*I++;
if (Inst == CI)
continue;
- if (!L->makeLoopInvariant(Inst, AnyInvariant,
- Preheader ? Preheader->getTerminator()
- : nullptr)) {
+ if (!L->makeLoopInvariant(
+ Inst, AnyInvariant,
+ Preheader ? Preheader->getTerminator() : nullptr, MSSAU)) {
AllInvariant = false;
break;
}
@@ -637,7 +660,7 @@ ReprocessLoop:
// The block has now been cleared of all instructions except for
// a comparison and a conditional branch. SimplifyCFG may be able
// to fold it now.
- if (!FoldBranchToCommonDest(BI))
+ if (!FoldBranchToCommonDest(BI, MSSAU))
continue;
// Success. The block is now dead, so remove it from the loop,
@@ -657,11 +680,16 @@ ReprocessLoop:
DT->changeImmediateDominator(Child, Node->getIDom());
}
DT->eraseNode(ExitingBlock);
+ if (MSSAU) {
+ SmallSetVector<BasicBlock *, 8> ExitBlockSet;
+ ExitBlockSet.insert(ExitingBlock);
+ MSSAU->removeBlocks(ExitBlockSet);
+ }
BI->getSuccessor(0)->removePredecessor(
- ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA);
+ ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA);
BI->getSuccessor(1)->removePredecessor(
- ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA);
+ ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA);
ExitingBlock->eraseFromParent();
}
}
@@ -672,12 +700,15 @@ ReprocessLoop:
if (Changed && SE)
SE->forgetTopmostLoop(L);
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
return Changed;
}
bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, AssumptionCache *AC,
- bool PreserveLCSSA) {
+ MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
bool Changed = false;
#ifndef NDEBUG
@@ -705,7 +736,7 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
while (!Worklist.empty())
Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE,
- AC, PreserveLCSSA);
+ AC, MSSAU, PreserveLCSSA);
return Changed;
}
@@ -737,6 +768,9 @@ namespace {
AU.addPreservedID(LCSSAID);
AU.addPreserved<DependenceAnalysisWrapperPass>();
AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
+ AU.addPreserved<BranchProbabilityInfoWrapperPass>();
+ if (EnableMSSALoopDependency)
+ AU.addPreserved<MemorySSAWrapperPass>();
}
/// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
@@ -768,12 +802,21 @@ bool LoopSimplify::runOnFunction(Function &F) {
ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr;
AssumptionCache *AC =
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ MemorySSA *MSSA = nullptr;
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (EnableMSSALoopDependency) {
+ auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ if (MSSAAnalysis) {
+ MSSA = &MSSAAnalysis->getMSSA();
+ MSSAU = make_unique<MemorySSAUpdater>(MSSA);
+ }
+ }
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
// Simplify each loop nest in the function.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |= simplifyLoop(*I, DT, LI, SE, AC, PreserveLCSSA);
+ Changed |= simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA);
#ifndef NDEBUG
if (PreserveLCSSA) {
@@ -794,9 +837,10 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F,
AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
// Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA
- // after simplifying the loops.
+ // after simplifying the loops. MemorySSA is not preserved either.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |= simplifyLoop(*I, DT, LI, SE, AC, /*PreserveLCSSA*/ false);
+ Changed |=
+ simplifyLoop(*I, DT, LI, SE, AC, nullptr, /*PreserveLCSSA*/ false);
if (!Changed)
return PreservedAnalyses::all();
@@ -809,6 +853,12 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F,
PA.preserve<SCEVAA>();
PA.preserve<ScalarEvolutionAnalysis>();
PA.preserve<DependenceAnalysis>();
+ // BPI maps conditional terminators to probabilities, LoopSimplify can insert
+ // blocks, but it does so only by splitting existing blocks and edges. This
+ // results in the interesting property that all new terminators inserted are
+ // unconditional branches which do not appear in BPI. All deletions are
+ // handled via ValueHandle callbacks w/in BPI.
+ PA.preserve<BranchProbabilityAnalysis>();
return PA;
}
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index da7ed2bd1652..e39ade523714 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -1,9 +1,8 @@
//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -45,6 +44,8 @@ using namespace llvm;
// TODO: Should these be here or in LoopUnroll?
STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
+STATISTIC(NumUnrolledWithHeader, "Number of loops unrolled without a "
+ "conditional latch (completely or otherwise)");
static cl::opt<bool>
UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
@@ -94,66 +95,6 @@ void llvm::remapInstruction(Instruction *I, ValueToValueMapTy &VMap) {
}
}
-/// Folds a basic block into its predecessor if it only has one predecessor, and
-/// that predecessor only has one successor.
-/// The LoopInfo Analysis that is passed will be kept consistent.
-BasicBlock *llvm::foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI,
- ScalarEvolution *SE,
- DominatorTree *DT) {
- // Merge basic blocks into their predecessor if there is only one distinct
- // pred, and if there is only one distinct successor of the predecessor, and
- // if there are no PHI nodes.
- BasicBlock *OnlyPred = BB->getSinglePredecessor();
- if (!OnlyPred) return nullptr;
-
- if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
- return nullptr;
-
- LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into "
- << OnlyPred->getName() << "\n");
-
- // Resolve any PHI nodes at the start of the block. They are all
- // guaranteed to have exactly one entry if they exist, unless there are
- // multiple duplicate (but guaranteed to be equal) entries for the
- // incoming edges. This occurs when there are multiple edges from
- // OnlyPred to OnlySucc.
- FoldSingleEntryPHINodes(BB);
-
- // Delete the unconditional branch from the predecessor...
- OnlyPred->getInstList().pop_back();
-
- // Make all PHI nodes that referred to BB now refer to Pred as their
- // source...
- BB->replaceAllUsesWith(OnlyPred);
-
- // Move all definitions in the successor to the predecessor...
- OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
-
- // OldName will be valid until erased.
- StringRef OldName = BB->getName();
-
- // Erase the old block and update dominator info.
- if (DT)
- if (DomTreeNode *DTN = DT->getNode(BB)) {
- DomTreeNode *PredDTN = DT->getNode(OnlyPred);
- SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end());
- for (auto *DI : Children)
- DT->changeImmediateDominator(DI, PredDTN);
-
- DT->eraseNode(BB);
- }
-
- LI->removeBlock(BB);
-
- // Inherit predecessor's name if it exists...
- if (!OldName.empty() && !OnlyPred->hasName())
- OnlyPred->setName(OldName);
-
- BB->eraseFromParent();
-
- return OnlyPred;
-}
-
/// Check if unrolling created a situation where we need to insert phi nodes to
/// preserve LCSSA form.
/// \param Blocks is a vector of basic blocks representing unrolled loop.
@@ -332,12 +273,11 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
///
/// If RemainderLoop is non-null, it will receive the remainder loop (if
/// required and not fully unrolled).
-LoopUnrollResult llvm::UnrollLoop(
- Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime,
- bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst,
- unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder,
- LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop) {
+LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
+ ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC,
+ OptimizationRemarkEmitter *ORE,
+ bool PreserveLCSSA, Loop **RemainderLoop) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
@@ -357,28 +297,46 @@ LoopUnrollResult llvm::UnrollLoop(
return LoopUnrollResult::Unmodified;
}
- // The current loop unroll pass can only unroll loops with a single latch
+ // The current loop unroll pass can unroll loops with a single latch or header
// that's a conditional branch exiting the loop.
// FIXME: The implementation can be extended to work with more complicated
// cases, e.g. loops with multiple latches.
BasicBlock *Header = L->getHeader();
+ BranchInst *HeaderBI = dyn_cast<BranchInst>(Header->getTerminator());
BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
- if (!BI || BI->isUnconditional()) {
- // The loop-rotate pass can be helpful to avoid this in many cases.
+ // FIXME: Support loops without conditional latch and multiple exiting blocks.
+ if (!BI ||
+ (BI->isUnconditional() && (!HeaderBI || HeaderBI->isUnconditional() ||
+ L->getExitingBlock() != Header))) {
+ LLVM_DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional "
+ "branch in the latch or header.\n");
+ return LoopUnrollResult::Unmodified;
+ }
+
+ auto CheckLatchSuccessors = [&](unsigned S1, unsigned S2) {
+ return BI->isConditional() && BI->getSuccessor(S1) == Header &&
+ !L->contains(BI->getSuccessor(S2));
+ };
+
+ // If we have a conditional latch, it must exit the loop.
+ if (BI && BI->isConditional() && !CheckLatchSuccessors(0, 1) &&
+ !CheckLatchSuccessors(1, 0)) {
LLVM_DEBUG(
- dbgs()
- << " Can't unroll; loop not terminated by a conditional branch.\n");
+ dbgs() << "Can't unroll; a conditional latch must exit the loop");
return LoopUnrollResult::Unmodified;
}
- auto CheckSuccessors = [&](unsigned S1, unsigned S2) {
- return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2));
+ auto CheckHeaderSuccessors = [&](unsigned S1, unsigned S2) {
+ return HeaderBI && HeaderBI->isConditional() &&
+ L->contains(HeaderBI->getSuccessor(S1)) &&
+ !L->contains(HeaderBI->getSuccessor(S2));
};
- if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) {
- LLVM_DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch"
- " exiting the loop can be unrolled\n");
+ // If we do not have a conditional latch, the header must exit the loop.
+ if (BI && !BI->isConditional() && HeaderBI && HeaderBI->isConditional() &&
+ !CheckHeaderSuccessors(0, 1) && !CheckHeaderSuccessors(1, 0)) {
+ LLVM_DEBUG(dbgs() << "Can't unroll; conditional header must exit the loop");
return LoopUnrollResult::Unmodified;
}
@@ -389,28 +347,28 @@ LoopUnrollResult llvm::UnrollLoop(
return LoopUnrollResult::Unmodified;
}
- if (TripCount != 0)
- LLVM_DEBUG(dbgs() << " Trip Count = " << TripCount << "\n");
- if (TripMultiple != 1)
- LLVM_DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n");
+ if (ULO.TripCount != 0)
+ LLVM_DEBUG(dbgs() << " Trip Count = " << ULO.TripCount << "\n");
+ if (ULO.TripMultiple != 1)
+ LLVM_DEBUG(dbgs() << " Trip Multiple = " << ULO.TripMultiple << "\n");
// Effectively "DCE" unrolled iterations that are beyond the tripcount
// and will never be executed.
- if (TripCount != 0 && Count > TripCount)
- Count = TripCount;
+ if (ULO.TripCount != 0 && ULO.Count > ULO.TripCount)
+ ULO.Count = ULO.TripCount;
// Don't enter the unroll code if there is nothing to do.
- if (TripCount == 0 && Count < 2 && PeelCount == 0) {
+ if (ULO.TripCount == 0 && ULO.Count < 2 && ULO.PeelCount == 0) {
LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
return LoopUnrollResult::Unmodified;
}
- assert(Count > 0);
- assert(TripMultiple > 0);
- assert(TripCount == 0 || TripCount % TripMultiple == 0);
+ assert(ULO.Count > 0);
+ assert(ULO.TripMultiple > 0);
+ assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0);
// Are we eliminating the loop control altogether?
- bool CompletelyUnroll = Count == TripCount;
+ bool CompletelyUnroll = ULO.Count == ULO.TripCount;
SmallVector<BasicBlock *, 4> ExitBlocks;
L->getExitBlocks(ExitBlocks);
std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks();
@@ -429,24 +387,29 @@ LoopUnrollResult llvm::UnrollLoop(
// We assume a run-time trip count if the compiler cannot
// figure out the loop trip count and the unroll-runtime
// flag is specified.
- bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
+ bool RuntimeTripCount =
+ (ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime);
- assert((!RuntimeTripCount || !PeelCount) &&
+ assert((!RuntimeTripCount || !ULO.PeelCount) &&
"Did not expect runtime trip-count unrolling "
"and peeling for the same loop");
bool Peeled = false;
- if (PeelCount) {
- Peeled = peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA);
+ if (ULO.PeelCount) {
+ Peeled = peelLoop(L, ULO.PeelCount, LI, SE, DT, AC, PreserveLCSSA);
// Successful peeling may result in a change in the loop preheader/trip
// counts. If we later unroll the loop, we want these to be updated.
if (Peeled) {
- BasicBlock *ExitingBlock = L->getExitingBlock();
+ // According to our guards and profitability checks the only
+ // meaningful exit should be latch block. Other exits go to deopt,
+ // so we do not worry about them.
+ BasicBlock *ExitingBlock = L->getLoopLatch();
assert(ExitingBlock && "Loop without exiting block?");
+ assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?");
Preheader = L->getLoopPreheader();
- TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
- TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
+ ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
+ ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
}
}
@@ -459,7 +422,7 @@ LoopUnrollResult llvm::UnrollLoop(
for (auto &I : *BB)
if (auto CS = CallSite(&I))
HasConvergent |= CS.isConvergent();
- assert((!HasConvergent || TripMultiple % Count == 0) &&
+ assert((!HasConvergent || ULO.TripMultiple % ULO.Count == 0) &&
"Unroll count must divide trip multiple if loop contains a "
"convergent operation.");
});
@@ -468,11 +431,12 @@ LoopUnrollResult llvm::UnrollLoop(
UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
: isEpilogProfitable(L);
- if (RuntimeTripCount && TripMultiple % Count != 0 &&
- !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
- EpilogProfitability, UnrollRemainder, LI, SE,
- DT, AC, PreserveLCSSA, RemainderLoop)) {
- if (Force)
+ if (RuntimeTripCount && ULO.TripMultiple % ULO.Count != 0 &&
+ !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount,
+ EpilogProfitability, ULO.UnrollRemainder,
+ ULO.ForgetAllSCEV, LI, SE, DT, AC,
+ PreserveLCSSA, RemainderLoop)) {
+ if (ULO.Force)
RuntimeTripCount = false;
else {
LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
@@ -483,35 +447,35 @@ LoopUnrollResult llvm::UnrollLoop(
// If we know the trip count, we know the multiple...
unsigned BreakoutTrip = 0;
- if (TripCount != 0) {
- BreakoutTrip = TripCount % Count;
- TripMultiple = 0;
+ if (ULO.TripCount != 0) {
+ BreakoutTrip = ULO.TripCount % ULO.Count;
+ ULO.TripMultiple = 0;
} else {
// Figure out what multiple to use.
- BreakoutTrip = TripMultiple =
- (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
+ BreakoutTrip = ULO.TripMultiple =
+ (unsigned)GreatestCommonDivisor64(ULO.Count, ULO.TripMultiple);
}
using namespace ore;
// Report the unrolling decision.
if (CompletelyUnroll) {
LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
- << " with trip count " << TripCount << "!\n");
+ << " with trip count " << ULO.TripCount << "!\n");
if (ORE)
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
L->getHeader())
<< "completely unrolled loop with "
- << NV("UnrollCount", TripCount) << " iterations";
+ << NV("UnrollCount", ULO.TripCount) << " iterations";
});
- } else if (PeelCount) {
+ } else if (ULO.PeelCount) {
LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName()
- << " with iteration count " << PeelCount << "!\n");
+ << " with iteration count " << ULO.PeelCount << "!\n");
if (ORE)
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
L->getHeader())
- << " peeled loop by " << NV("PeelCount", PeelCount)
+ << " peeled loop by " << NV("PeelCount", ULO.PeelCount)
<< " iterations";
});
} else {
@@ -519,24 +483,25 @@ LoopUnrollResult llvm::UnrollLoop(
OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
L->getHeader());
return Diag << "unrolled loop by a factor of "
- << NV("UnrollCount", Count);
+ << NV("UnrollCount", ULO.Count);
};
LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by "
- << Count);
- if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
+ << ULO.Count);
+ if (ULO.TripMultiple == 0 || BreakoutTrip != ULO.TripMultiple) {
LLVM_DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
if (ORE)
ORE->emit([&]() {
return DiagBuilder() << " with a breakout at trip "
<< NV("BreakoutTrip", BreakoutTrip);
});
- } else if (TripMultiple != 1) {
- LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+ } else if (ULO.TripMultiple != 1) {
+ LLVM_DEBUG(dbgs() << " with " << ULO.TripMultiple << " trips per branch");
if (ORE)
ORE->emit([&]() {
- return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
- << " trips per branch";
+ return DiagBuilder()
+ << " with " << NV("TripMultiple", ULO.TripMultiple)
+ << " trips per branch";
});
} else if (RuntimeTripCount) {
LLVM_DEBUG(dbgs() << " with run-time trip count");
@@ -555,11 +520,24 @@ LoopUnrollResult llvm::UnrollLoop(
// and if something changes inside them then any of outer loops may also
// change. When we forget outermost loop, we also forget all contained loops
// and this is what we need here.
- if (SE)
- SE->forgetTopmostLoop(L);
+ if (SE) {
+ if (ULO.ForgetAllSCEV)
+ SE->forgetAllLoops();
+ else
+ SE->forgetTopmostLoop(L);
+ }
- bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
- BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
+ bool ContinueOnTrue;
+ bool LatchIsExiting = BI->isConditional();
+ BasicBlock *LoopExit = nullptr;
+ if (LatchIsExiting) {
+ ContinueOnTrue = L->contains(BI->getSuccessor(0));
+ LoopExit = BI->getSuccessor(ContinueOnTrue);
+ } else {
+ NumUnrolledWithHeader++;
+ ContinueOnTrue = L->contains(HeaderBI->getSuccessor(0));
+ LoopExit = HeaderBI->getSuccessor(ContinueOnTrue);
+ }
// For the first iteration of the loop, we should use the precloned values for
// PHI nodes. Insert associations now.
@@ -569,11 +547,23 @@ LoopUnrollResult llvm::UnrollLoop(
OrigPHINode.push_back(cast<PHINode>(I));
}
- std::vector<BasicBlock*> Headers;
- std::vector<BasicBlock*> Latches;
+ std::vector<BasicBlock *> Headers;
+ std::vector<BasicBlock *> HeaderSucc;
+ std::vector<BasicBlock *> Latches;
Headers.push_back(Header);
Latches.push_back(LatchBlock);
+ if (!LatchIsExiting) {
+ auto *Term = cast<BranchInst>(Header->getTerminator());
+ if (Term->isUnconditional() || L->contains(Term->getSuccessor(0))) {
+ assert(L->contains(Term->getSuccessor(0)));
+ HeaderSucc.push_back(Term->getSuccessor(0));
+ } else {
+ assert(L->contains(Term->getSuccessor(1)));
+ HeaderSucc.push_back(Term->getSuccessor(1));
+ }
+ }
+
// The current on-the-fly SSA update requires blocks to be processed in
// reverse postorder so that LastValueMap contains the correct value at each
// exit.
@@ -599,7 +589,7 @@ LoopUnrollResult llvm::UnrollLoop(
for (Instruction &I : *BB)
if (!isa<DbgInfoIntrinsic>(&I))
if (const DILocation *DIL = I.getDebugLoc()) {
- auto NewDIL = DIL->cloneWithDuplicationFactor(Count);
+ auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.Count);
if (NewDIL)
I.setDebugLoc(NewDIL.getValue());
else
@@ -608,7 +598,7 @@ LoopUnrollResult llvm::UnrollLoop(
<< DIL->getFilename() << " Line: " << DIL->getLine());
}
- for (unsigned It = 1; It != Count; ++It) {
+ for (unsigned It = 1; It != ULO.Count; ++It) {
std::vector<BasicBlock*> NewBlocks;
SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
NewLoops[L] = L;
@@ -663,6 +653,13 @@ LoopUnrollResult llvm::UnrollLoop(
if (*BB == LatchBlock)
Latches.push_back(New);
+ // Keep track of the successor of the new header in the current iteration.
+ for (auto *Pred : predecessors(*BB))
+ if (Pred == Header) {
+ HeaderSucc.push_back(New);
+ break;
+ }
+
NewBlocks.push_back(New);
UnrolledLoopBlocks.push_back(New);
@@ -699,8 +696,7 @@ LoopUnrollResult llvm::UnrollLoop(
if (CompletelyUnroll) {
PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
Header->getInstList().erase(PN);
- }
- else if (Count > 1) {
+ } else if (ULO.Count > 1) {
Value *InVal = PN->removeIncomingValue(LatchBlock, false);
// If this value was defined in the loop, take the value defined by the
// last iteration of the loop.
@@ -713,39 +709,11 @@ LoopUnrollResult llvm::UnrollLoop(
}
}
- // Now that all the basic blocks for the unrolled iterations are in place,
- // set up the branches to connect them.
- for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
- // The original branch was replicated in each unrolled iteration.
- BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
-
- // The branch destination.
- unsigned j = (i + 1) % e;
- BasicBlock *Dest = Headers[j];
- bool NeedConditional = true;
-
- if (RuntimeTripCount && j != 0) {
- NeedConditional = false;
- }
-
- // For a complete unroll, make the last iteration end with a branch
- // to the exit block.
- if (CompletelyUnroll) {
- if (j == 0)
- Dest = LoopExit;
- // If using trip count upper bound to completely unroll, we need to keep
- // the conditional branch except the last one because the loop may exit
- // after any iteration.
- assert(NeedConditional &&
- "NeedCondition cannot be modified by both complete "
- "unrolling and runtime unrolling");
- NeedConditional = (PreserveCondBr && j && !(PreserveOnlyFirst && i != 0));
- } else if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
- // If we know the trip count or a multiple of it, we can safely use an
- // unconditional branch for some iterations.
- NeedConditional = false;
- }
-
+ auto setDest = [LoopExit, ContinueOnTrue](BasicBlock *Src, BasicBlock *Dest,
+ ArrayRef<BasicBlock *> NextBlocks,
+ BasicBlock *CurrentHeader,
+ bool NeedConditional) {
+ auto *Term = cast<BranchInst>(Src->getTerminator());
if (NeedConditional) {
// Update the conditional branch's successor for the following
// iteration.
@@ -753,9 +721,9 @@ LoopUnrollResult llvm::UnrollLoop(
} else {
// Remove phi operands at this loop exit
if (Dest != LoopExit) {
- BasicBlock *BB = Latches[i];
- for (BasicBlock *Succ: successors(BB)) {
- if (Succ == Headers[i])
+ BasicBlock *BB = Src;
+ for (BasicBlock *Succ : successors(BB)) {
+ if (Succ == CurrentHeader)
continue;
for (PHINode &Phi : Succ->phis())
Phi.removeIncomingValue(BB, false);
@@ -765,13 +733,97 @@ LoopUnrollResult llvm::UnrollLoop(
BranchInst::Create(Dest, Term);
Term->eraseFromParent();
}
+ };
+
+ // Now that all the basic blocks for the unrolled iterations are in place,
+ // set up the branches to connect them.
+ if (LatchIsExiting) {
+ // Set up latches to branch to the new header in the unrolled iterations or
+ // the loop exit for the last latch in a fully unrolled loop.
+ for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ BasicBlock *Dest = Headers[j];
+ bool NeedConditional = true;
+
+ if (RuntimeTripCount && j != 0) {
+ NeedConditional = false;
+ }
+
+ // For a complete unroll, make the last iteration end with a branch
+ // to the exit block.
+ if (CompletelyUnroll) {
+ if (j == 0)
+ Dest = LoopExit;
+ // If using trip count upper bound to completely unroll, we need to keep
+ // the conditional branch except the last one because the loop may exit
+ // after any iteration.
+ assert(NeedConditional &&
+ "NeedCondition cannot be modified by both complete "
+ "unrolling and runtime unrolling");
+ NeedConditional =
+ (ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0));
+ } else if (j != BreakoutTrip &&
+ (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) {
+ // If we know the trip count or a multiple of it, we can safely use an
+ // unconditional branch for some iterations.
+ NeedConditional = false;
+ }
+
+ setDest(Latches[i], Dest, Headers, Headers[i], NeedConditional);
+ }
+ } else {
+ // Setup headers to branch to their new successors in the unrolled
+ // iterations.
+ for (unsigned i = 0, e = Headers.size(); i != e; ++i) {
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ BasicBlock *Dest = HeaderSucc[i];
+ bool NeedConditional = true;
+
+ if (RuntimeTripCount && j != 0)
+ NeedConditional = false;
+
+ if (CompletelyUnroll)
+ // We cannot drop the conditional branch for the last condition, as we
+ // may have to execute the loop body depending on the condition.
+ NeedConditional = j == 0 || ULO.PreserveCondBr;
+ else if (j != BreakoutTrip &&
+ (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0))
+ // If we know the trip count or a multiple of it, we can safely use an
+ // unconditional branch for some iterations.
+ NeedConditional = false;
+
+ setDest(Headers[i], Dest, Headers, Headers[i], NeedConditional);
+ }
+
+ // Set up latches to branch to the new header in the unrolled iterations or
+ // the loop exit for the last latch in a fully unrolled loop.
+
+ for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+ // The original branch was replicated in each unrolled iteration.
+ BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ BasicBlock *Dest = Headers[j];
+
+ // When completely unrolling, the last latch becomes unreachable.
+ if (CompletelyUnroll && j == 0)
+ new UnreachableInst(Term->getContext(), Term);
+ else
+ // Replace the conditional branch with an unconditional one.
+ BranchInst::Create(Dest, Term);
+
+ Term->eraseFromParent();
+ }
}
// Update dominators of blocks we might reach through exits.
// Immediate dominator of such block might change, because we add more
// routes which can lead to the exit: we can now reach it from the copied
// iterations too.
- if (DT && Count > 1) {
+ if (DT && ULO.Count > 1) {
for (auto *BB : OriginalLoopBlocks) {
auto *BBDomNode = DT->getNode(BB);
SmallVector<BasicBlock *, 16> ChildrenToUpdate;
@@ -781,7 +833,9 @@ LoopUnrollResult llvm::UnrollLoop(
ChildrenToUpdate.push_back(ChildBB);
}
BasicBlock *NewIDom;
- if (BB == LatchBlock) {
+ BasicBlock *&TermBlock = LatchIsExiting ? LatchBlock : Header;
+ auto &TermBlocks = LatchIsExiting ? Latches : Headers;
+ if (BB == TermBlock) {
// The latch is special because we emit unconditional branches in
// some cases where the original loop contained a conditional branch.
// Since the latch is always at the bottom of the loop, if the latch
@@ -789,11 +843,13 @@ LoopUnrollResult llvm::UnrollLoop(
// must also be a latch. Specifically, the dominator is the first
// latch which ends in a conditional branch, or the last latch if
// there is no such latch.
- NewIDom = Latches.back();
- for (BasicBlock *IterLatch : Latches) {
- Instruction *Term = IterLatch->getTerminator();
+ // For loops exiting from the header, we limit the supported loops
+ // to have a single exiting block.
+ NewIDom = TermBlocks.back();
+ for (BasicBlock *Iter : TermBlocks) {
+ Instruction *Term = Iter->getTerminator();
if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
- NewIDom = IterLatch;
+ NewIDom = Iter;
break;
}
}
@@ -810,14 +866,20 @@ LoopUnrollResult llvm::UnrollLoop(
}
assert(!DT || !UnrollVerifyDomtree ||
- DT->verify(DominatorTree::VerificationLevel::Fast));
+ DT->verify(DominatorTree::VerificationLevel::Fast));
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
// Merge adjacent basic blocks, if possible.
for (BasicBlock *Latch : Latches) {
- BranchInst *Term = cast<BranchInst>(Latch->getTerminator());
- if (Term->isUnconditional()) {
+ BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator());
+ assert((Term ||
+ (CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) &&
+ "Need a branch as terminator, except when fully unrolling with "
+ "unconditional latch");
+ if (Term && Term->isUnconditional()) {
BasicBlock *Dest = Term->getSuccessor(0);
- if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) {
+ BasicBlock *Fold = Dest->getUniquePredecessor();
+ if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
// Dest has been folded into Fold. Update our worklists accordingly.
std::replace(Latches.begin(), Latches.end(), Dest, Fold);
UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(),
@@ -829,8 +891,8 @@ LoopUnrollResult llvm::UnrollLoop(
// At this point, the code is well formed. We now simplify the unrolled loop,
// doing constant propagation and dead code elimination as we go.
- simplifyLoopAfterUnroll(L, !CompletelyUnroll && (Count > 1 || Peeled), LI, SE,
- DT, AC);
+ simplifyLoopAfterUnroll(L, !CompletelyUnroll && (ULO.Count > 1 || Peeled), LI,
+ SE, DT, AC);
NumCompletelyUnrolled += CompletelyUnroll;
++NumUnrolled;
@@ -878,11 +940,11 @@ LoopUnrollResult llvm::UnrollLoop(
// TODO: That potentially might be compile-time expensive. We should try
// to fix the loop-simplified form incrementally.
- simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA);
+ simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA);
} else {
// Simplify loops for which we might've broken loop-simplify form.
for (Loop *SubLoop : LoopsToSimplify)
- simplifyLoop(SubLoop, DT, LI, SE, AC, PreserveLCSSA);
+ simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA);
}
}
diff --git a/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index e26762639c13..ff49d83f25c5 100644
--- a/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -1,9 +1,8 @@
//===-- LoopUnrollAndJam.cpp - Loop unrolling utilities -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -198,8 +197,8 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
if (TripMultiple == 1 || TripMultiple % Count != 0) {
if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
/*UseEpilogRemainder*/ true,
- UnrollRemainder, LI, SE, DT, AC, true,
- EpilogueLoop)) {
+ UnrollRemainder, /*ForgetAllSCEV*/ false,
+ LI, SE, DT, AC, true, EpilogueLoop)) {
LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
"generated when assuming runtime trip count\n");
return LoopUnrollResult::Unmodified;
@@ -301,7 +300,7 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
for (Instruction &I : *BB)
if (!isa<DbgInfoIntrinsic>(&I))
if (const DILocation *DIL = I.getDebugLoc()) {
- auto NewDIL = DIL->cloneWithDuplicationFactor(Count);
+ auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(Count);
if (NewDIL)
I.setDebugLoc(NewDIL.getValue());
else
@@ -539,12 +538,14 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
while (!MergeBlocks.empty()) {
BasicBlock *BB = *MergeBlocks.begin();
BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
if (Term && Term->isUnconditional() && L->contains(Term->getSuccessor(0))) {
BasicBlock *Dest = Term->getSuccessor(0);
- if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) {
+ BasicBlock *Fold = Dest->getUniquePredecessor();
+ if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
// Don't remove BB and add Fold as they are the same BB
assert(Fold == BB);
(void)Fold;
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 151a285af4e9..005306cf1898 100644
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -1,9 +1,8 @@
//===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -62,6 +61,10 @@ static cl::opt<unsigned> UnrollForcePeelCount(
"unroll-force-peel-count", cl::init(0), cl::Hidden,
cl::desc("Force a peel count regardless of profiling information."));
+static cl::opt<bool> UnrollPeelMultiDeoptExit(
+ "unroll-peel-multi-deopt-exit", cl::init(false), cl::Hidden,
+ cl::desc("Allow peeling of loops with multiple deopt exits."));
+
// Designates that a Phi is estimated to become invariant after an "infinite"
// number of loop iterations (i.e. only may become an invariant if the loop is
// fully unrolled).
@@ -74,6 +77,22 @@ bool llvm::canPeel(Loop *L) {
if (!L->isLoopSimplifyForm())
return false;
+ if (UnrollPeelMultiDeoptExit) {
+ SmallVector<BasicBlock *, 4> Exits;
+ L->getUniqueNonLatchExitBlocks(Exits);
+
+ if (!Exits.empty()) {
+ // Latch's terminator is a conditional branch, Latch is exiting and
+ // all non Latch exits ends up with deoptimize.
+ const BasicBlock *Latch = L->getLoopLatch();
+ const BranchInst *T = dyn_cast<BranchInst>(Latch->getTerminator());
+ return T && T->isConditional() && L->isLoopExiting(Latch) &&
+ all_of(Exits, [](const BasicBlock *BB) {
+ return BB->getTerminatingDeoptimizeCall();
+ });
+ }
+ }
+
// Only peel loops that contain a single exit
if (!L->getExitingBlock() || !L->getUniqueExitBlock())
return false;
@@ -363,41 +382,89 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
unsigned IterNumber, unsigned AvgIters,
uint64_t &PeeledHeaderWeight) {
+ if (!PeeledHeaderWeight)
+ return;
// FIXME: Pick a more realistic distribution.
// Currently the proportion of weight we assign to the fall-through
// side of the branch drops linearly with the iteration number, and we use
// a 0.9 fudge factor to make the drop-off less sharp...
- if (PeeledHeaderWeight) {
- uint64_t FallThruWeight =
- PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
- uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
- PeeledHeaderWeight -= ExitWeight;
-
- unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
- MDBuilder MDB(LatchBR->getContext());
- MDNode *WeightNode =
- HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
- : MDB.createBranchWeights(FallThruWeight, ExitWeight);
- LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
- }
+ uint64_t FallThruWeight =
+ PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
+ uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
+ PeeledHeaderWeight -= ExitWeight;
+
+ unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
+ MDBuilder MDB(LatchBR->getContext());
+ MDNode *WeightNode =
+ HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
+ : MDB.createBranchWeights(FallThruWeight, ExitWeight);
+ LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
+}
+
+/// Initialize the weights.
+///
+/// \param Header The header block.
+/// \param LatchBR The latch branch.
+/// \param AvgIters The average number of iterations we expect the loop to have.
+/// \param[out] ExitWeight The # of times the edge from Latch to Exit is taken.
+/// \param[out] CurHeaderWeight The # of times the header is executed.
+static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
+ unsigned AvgIters, uint64_t &ExitWeight,
+ uint64_t &CurHeaderWeight) {
+ uint64_t TrueWeight, FalseWeight;
+ if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
+ return;
+ unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
+ ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
+ // The # of times the loop body executes is the sum of the exit block
+ // is taken and the # of times the backedges are taken.
+ CurHeaderWeight = TrueWeight + FalseWeight;
+}
+
+/// Update the weights of original Latch block after peeling off all iterations.
+///
+/// \param Header The header block.
+/// \param LatchBR The latch branch.
+/// \param ExitWeight The weight of the edge from Latch to Exit block.
+/// \param CurHeaderWeight The # of time the header is executed.
+static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
+ uint64_t ExitWeight, uint64_t CurHeaderWeight) {
+ // Adjust the branch weights on the loop exit.
+ if (!ExitWeight)
+ return;
+
+ // The backedge count is the difference of current header weight and
+ // current loop exit weight. If the current header weight is smaller than
+ // the current loop exit weight, we mark the loop backedge weight as 1.
+ uint64_t BackEdgeWeight = 0;
+ if (ExitWeight < CurHeaderWeight)
+ BackEdgeWeight = CurHeaderWeight - ExitWeight;
+ else
+ BackEdgeWeight = 1;
+ MDBuilder MDB(LatchBR->getContext());
+ unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
+ MDNode *WeightNode =
+ HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
+ : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
+ LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
}
/// Clones the body of the loop L, putting it between \p InsertTop and \p
/// InsertBot.
/// \param IterNumber The serial number of the iteration currently being
/// peeled off.
-/// \param Exit The exit block of the original loop.
+/// \param ExitEdges The exit edges of the original loop.
/// \param[out] NewBlocks A list of the blocks in the newly created clone
/// \param[out] VMap The value map between the loop and the new clone.
/// \param LoopBlocks A helper for DFS-traversal of the loop.
/// \param LVMap A value-map that maps instructions from the original loop to
/// instructions in the last peeled-off iteration.
-static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
- BasicBlock *InsertBot, BasicBlock *Exit,
- SmallVectorImpl<BasicBlock *> &NewBlocks,
- LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
- ValueToValueMapTy &LVMap, DominatorTree *DT,
- LoopInfo *LI) {
+static void cloneLoopBlocks(
+ Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
+ SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *> > &ExitEdges,
+ SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
+ ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
+ LoopInfo *LI) {
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
BasicBlock *PreHeader = L->getLoopPreheader();
@@ -443,9 +510,11 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
// iteration (for every other iteration)
BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator());
- unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
- LatchBR->setSuccessor(HeaderIdx, InsertBot);
- LatchBR->setSuccessor(1 - HeaderIdx, Exit);
+ for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; ++idx)
+ if (LatchBR->getSuccessor(idx) == Header) {
+ LatchBR->setSuccessor(idx, InsertBot);
+ break;
+ }
if (DT)
DT->changeImmediateDominator(InsertBot, NewLatch);
@@ -476,14 +545,14 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
// we've just created. Note that this must happen *after* the incoming
// values are adjusted, since the value going out of the latch may also be
// a value coming into the header.
- for (BasicBlock::iterator I = Exit->begin(); isa<PHINode>(I); ++I) {
- PHINode *PHI = cast<PHINode>(I);
- Value *LatchVal = PHI->getIncomingValueForBlock(Latch);
- Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
- if (LatchInst && L->contains(LatchInst))
- LatchVal = VMap[LatchVal];
- PHI->addIncoming(LatchVal, cast<BasicBlock>(VMap[Latch]));
- }
+ for (auto Edge : ExitEdges)
+ for (PHINode &PHI : Edge.second->phis()) {
+ Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
+ Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
+ if (LatchInst && L->contains(LatchInst))
+ LatchVal = VMap[LatchVal];
+ PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
+ }
// LastValueMap is updated with the values for the current loop
// which are used the next time this function is called.
@@ -512,7 +581,20 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
BasicBlock *Header = L->getHeader();
BasicBlock *PreHeader = L->getLoopPreheader();
BasicBlock *Latch = L->getLoopLatch();
- BasicBlock *Exit = L->getUniqueExitBlock();
+ SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
+ L->getExitEdges(ExitEdges);
+
+ DenseMap<BasicBlock *, BasicBlock *> ExitIDom;
+ if (DT) {
+ assert(L->hasDedicatedExits() && "No dedicated exits?");
+ for (auto Edge : ExitEdges) {
+ if (ExitIDom.count(Edge.second))
+ continue;
+ BasicBlock *BB = DT->getNode(Edge.second)->getIDom()->getBlock();
+ assert(L->contains(BB) && "IDom is not in a loop");
+ ExitIDom[Edge.second] = BB;
+ }
+ }
Function *F = Header->getParent();
@@ -577,16 +659,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// newly created branches.
BranchInst *LatchBR =
cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
- unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
-
- uint64_t TrueWeight, FalseWeight;
uint64_t ExitWeight = 0, CurHeaderWeight = 0;
- if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
- ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
- // The # of times the loop body executes is the sum of the exit block
- // weight and the # of times the backedges are taken.
- CurHeaderWeight = TrueWeight + FalseWeight;
- }
+ initBranchWeights(Header, LatchBR, PeelCount, ExitWeight, CurHeaderWeight);
// For each peeled-off iteration, make a copy of the loop.
for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
@@ -602,8 +676,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
else
CurHeaderWeight = 1;
- cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit,
- NewBlocks, LoopBlocks, VMap, LVMap, DT, LI);
+ cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
+ LoopBlocks, VMap, LVMap, DT, LI);
// Remap to use values from the current iteration instead of the
// previous one.
@@ -614,7 +688,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// latter is the first cloned loop body, as original PreHeader dominates
// the original loop body.
if (Iter == 0)
- DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch]));
+ for (auto Exit : ExitIDom)
+ DT->changeImmediateDominator(Exit.first,
+ cast<BasicBlock>(LVMap[Exit.second]));
#ifdef EXPENSIVE_CHECKS
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
#endif
@@ -645,36 +721,22 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
if (LatchInst && L->contains(LatchInst))
NewVal = LVMap[LatchInst];
- PHI->setIncomingValue(PHI->getBasicBlockIndex(NewPreHeader), NewVal);
+ PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
}
- // Adjust the branch weights on the loop exit.
- if (ExitWeight) {
- // The backedge count is the difference of current header weight and
- // current loop exit weight. If the current header weight is smaller than
- // the current loop exit weight, we mark the loop backedge weight as 1.
- uint64_t BackEdgeWeight = 0;
- if (ExitWeight < CurHeaderWeight)
- BackEdgeWeight = CurHeaderWeight - ExitWeight;
- else
- BackEdgeWeight = 1;
- MDBuilder MDB(LatchBR->getContext());
- MDNode *WeightNode =
- HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
- : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
- LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
- }
+ fixupBranchWeights(Header, LatchBR, ExitWeight, CurHeaderWeight);
- // If the loop is nested, we changed the parent loop, update SE.
- if (Loop *ParentLoop = L->getParentLoop()) {
- SE->forgetLoop(ParentLoop);
+ if (Loop *ParentLoop = L->getParentLoop())
+ L = ParentLoop;
- // FIXME: Incrementally update loop-simplify
- simplifyLoop(ParentLoop, DT, LI, SE, AC, PreserveLCSSA);
- } else {
- // FIXME: Incrementally update loop-simplify
- simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA);
- }
+ // We modified the loop, update SE.
+ SE->forgetTopmostLoop(L);
+
+ // Finally DomtTree must be correct.
+ assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+
+ // FIXME: Incrementally update loop-simplify
+ simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA);
NumPeeled++;
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 00d2fd2fdbac..d22fdb4d52dc 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -1,9 +1,8 @@
//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -125,11 +124,10 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
// Update the existing PHI node operand with the value from the
// new PHI node. How this is done depends on if the existing
// PHI node is in the original loop block, or the exit block.
- if (L->contains(&PN)) {
- PN.setIncomingValue(PN.getBasicBlockIndex(NewPreHeader), NewPN);
- } else {
+ if (L->contains(&PN))
+ PN.setIncomingValueForBlock(NewPreHeader, NewPN);
+ else
PN.addIncoming(NewPN, PrologExit);
- }
}
}
@@ -265,7 +263,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
// Update the existing PHI node operand with the value from the new PHI
// node. Corresponding instruction in epilog loop should be PHI.
PHINode *VPN = cast<PHINode>(VMap[&PN]);
- VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN);
+ VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);
}
}
@@ -426,10 +424,9 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
/// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits
/// is populated with all the loop exit blocks other than the LatchExit block.
-static bool
-canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,
- BasicBlock *LatchExit, bool PreserveLCSSA,
- bool UseEpilogRemainder) {
+static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit,
+ bool PreserveLCSSA,
+ bool UseEpilogRemainder) {
// We currently have some correctness constrains in unrolling a multi-exit
// loop. Check for these below.
@@ -437,11 +434,6 @@ canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,
// We rely on LCSSA form being preserved when the exit blocks are transformed.
if (!PreserveLCSSA)
return false;
- SmallVector<BasicBlock *, 4> Exits;
- L->getUniqueExitBlocks(Exits);
- for (auto *BB : Exits)
- if (BB != LatchExit)
- OtherExits.push_back(BB);
// TODO: Support multiple exiting blocks jumping to the `LatchExit` when
// UnrollRuntimeMultiExit is true. This will need updating the logic in
@@ -471,9 +463,8 @@ static bool canProfitablyUnrollMultiExitLoop(
bool PreserveLCSSA, bool UseEpilogRemainder) {
#if !defined(NDEBUG)
- SmallVector<BasicBlock *, 8> OtherExitsDummyCheck;
- assert(canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit,
- PreserveLCSSA, UseEpilogRemainder) &&
+ assert(canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
+ UseEpilogRemainder) &&
"Should be safe to unroll before checking profitability!");
#endif
@@ -554,10 +545,10 @@ static bool canProfitablyUnrollMultiExitLoop(
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
bool AllowExpensiveTripCount,
bool UseEpilogRemainder,
- bool UnrollRemainder, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT,
- AssumptionCache *AC, bool PreserveLCSSA,
- Loop **ResultLoop) {
+ bool UnrollRemainder, bool ForgetAllSCEV,
+ LoopInfo *LI, ScalarEvolution *SE,
+ DominatorTree *DT, AssumptionCache *AC,
+ bool PreserveLCSSA, Loop **ResultLoop) {
LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
LLVM_DEBUG(L->dump());
LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
@@ -597,8 +588,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// These are exit blocks other than the target of the latch exiting block.
SmallVector<BasicBlock *, 4> OtherExits;
+ L->getUniqueNonLatchExitBlocks(OtherExits);
bool isMultiExitUnrollingEnabled =
- canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
+ canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
UseEpilogRemainder) &&
canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
UseEpilogRemainder);
@@ -939,23 +931,24 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
if (OtherExits.size() > 0) {
// Generate dedicated exit blocks for the original loop, to preserve
// LoopSimplifyForm.
- formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA);
+ formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
// Generate dedicated exit blocks for the remainder loop if one exists, to
// preserve LoopSimplifyForm.
if (remainderLoop)
- formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
+ formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);
}
auto UnrollResult = LoopUnrollResult::Unmodified;
if (remainderLoop && UnrollRemainder) {
LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
UnrollResult =
- UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,
- /*Force*/ false, /*AllowRuntime*/ false,
- /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
- /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
- /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC,
- /*ORE*/ nullptr, PreserveLCSSA);
+ UnrollLoop(remainderLoop,
+ {/*Count*/ Count - 1, /*TripCount*/ Count - 1,
+ /*Force*/ false, /*AllowRuntime*/ false,
+ /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
+ /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
+ /*PeelCount*/ 0, /*UnrollRemainder*/ false, ForgetAllSCEV},
+ LI, SE, DT, AC, /*ORE*/ nullptr, PreserveLCSSA);
}
if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index a93d1aeb62ef..ec226e65f650 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -1,9 +1,8 @@
//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,10 +14,12 @@
#include "llvm/ADT/ScopeExit.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
@@ -27,7 +28,6 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -47,6 +47,7 @@ using namespace llvm::PatternMatch;
static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
bool PreserveLCSSA) {
bool Changed = false;
@@ -66,6 +67,9 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
if (isa<IndirectBrInst>(PredBB->getTerminator()))
// We cannot rewrite exiting edges from an indirectbr.
return false;
+ if (isa<CallBrInst>(PredBB->getTerminator()))
+ // We cannot rewrite exiting edges from a callbr.
+ return false;
InLoopPredecessors.push_back(PredBB);
} else {
@@ -79,7 +83,7 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
return false;
auto *NewExitBB = SplitBlockPredecessors(
- BB, InLoopPredecessors, ".loopexit", DT, LI, nullptr, PreserveLCSSA);
+ BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA);
if (!NewExitBB)
LLVM_DEBUG(
@@ -217,7 +221,10 @@ static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
// When the value is absent it is interpreted as 'attribute set'.
return true;
case 2:
- return mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get());
+ if (ConstantInt *IntMD =
+ mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get()))
+ return IntMD->getZExtValue();
+ return true;
}
llvm_unreachable("unexpected number of options");
}
@@ -376,17 +383,17 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
Optional<int> InterleaveCount =
getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
- if (Enable == true) {
- // 'Forcing' vector width and interleave count to one effectively disables
- // this tranformation.
- if (VectorizeWidth == 1 && InterleaveCount == 1)
- return TM_SuppressedByUser;
- return TM_ForcedByUser;
- }
+ // 'Forcing' vector width and interleave count to one effectively disables
+ // this tranformation.
+ if (Enable == true && VectorizeWidth == 1 && InterleaveCount == 1)
+ return TM_SuppressedByUser;
if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
return TM_Disable;
+ if (Enable == true)
+ return TM_ForcedByUser;
+
if (VectorizeWidth == 1 && InterleaveCount == 1)
return TM_Disable;
@@ -528,10 +535,9 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
if (DT) {
// Update the dominator tree by informing it about the new edge from the
- // preheader to the exit.
- DTU.insertEdge(Preheader, ExitBlock);
- // Inform the dominator tree about the removed edge.
- DTU.deleteEdge(Preheader, L->getHeader());
+ // preheader to the exit and the removed edge.
+ DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock},
+ {DominatorTree::Delete, Preheader, L->getHeader()}});
}
// Use a map to unique and a vector to guarantee deterministic ordering.
@@ -578,10 +584,14 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
// dbg.value truncates the range of any dbg.value before the loop where the
// loop used to be. This is particularly important for constant values.
DIBuilder DIB(*ExitBlock->getModule());
+ Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI();
+ assert(InsertDbgValueBefore &&
+ "There should be a non-PHI instruction in exit block, else these "
+ "instructions will have no parent.");
for (auto *DVI : DeadDebugInst)
- DIB.insertDbgValueIntrinsic(
- UndefValue::get(Builder.getInt32Ty()), DVI->getVariable(),
- DVI->getExpression(), DVI->getDebugLoc(), ExitBlock->getFirstNonPHI());
+ DIB.insertDbgValueIntrinsic(UndefValue::get(Builder.getInt32Ty()),
+ DVI->getVariable(), DVI->getExpression(),
+ DVI->getDebugLoc(), InsertDbgValueBefore);
// Remove the block from the reference counting scheme, so that we can
// delete it freely later.
@@ -611,20 +621,28 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
}
Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
- // Only support loops with a unique exiting block, and a latch.
- if (!L->getExitingBlock())
- return None;
+ // Support loops with an exiting latch and other existing exists only
+ // deoptimize.
// Get the branch weights for the loop's backedge.
- BranchInst *LatchBR =
- dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator());
- if (!LatchBR || LatchBR->getNumSuccessors() != 2)
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return None;
+ BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
return None;
assert((LatchBR->getSuccessor(0) == L->getHeader() ||
LatchBR->getSuccessor(1) == L->getHeader()) &&
"At least one edge out of the latch must go to the header");
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ L->getUniqueNonLatchExitBlocks(ExitBlocks);
+ if (any_of(ExitBlocks, [](const BasicBlock *EB) {
+ return !EB->getTerminatingDeoptimizeCall();
+ }))
+ return None;
+
// To estimate the number of times the loop body was executed, we want to
// know the number of times the backedge was taken, vs. the number of times
// we exited the loop.
@@ -665,16 +683,6 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
return true;
}
-/// Adds a 'fast' flag to floating point operations.
-static Value *addFastMathFlag(Value *V) {
- if (isa<FPMathOperator>(V)) {
- FastMathFlags Flags;
- Flags.setFast();
- cast<Instruction>(V)->setFastMathFlags(Flags);
- }
- return V;
-}
-
Value *llvm::createMinMaxOp(IRBuilder<> &Builder,
RecurrenceDescriptor::MinMaxRecurrenceKind RK,
Value *Left, Value *Right) {
@@ -778,9 +786,9 @@ llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
ConstantVector::get(ShuffleMask), "rdx.shuf");
if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
- // Floating point operations had to be 'fast' to enable the reduction.
- TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op,
- TmpVec, Shuf, "bin.rdx"));
+ // The builder propagates its fast-math-flags setting.
+ TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
+ "bin.rdx");
} else {
assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
"Invalid min/max");
@@ -801,13 +809,9 @@ Value *llvm::createSimpleTargetReduction(
ArrayRef<Value *> RedOps) {
assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
- Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());
std::function<Value *()> BuildFunc;
using RD = RecurrenceDescriptor;
RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
- // TODO: Support creating ordered reductions.
- FastMathFlags FMFFast;
- FMFFast.setFast();
switch (Opcode) {
case Instruction::Add:
@@ -827,15 +831,15 @@ Value *llvm::createSimpleTargetReduction(
break;
case Instruction::FAdd:
BuildFunc = [&]() {
- auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src);
- cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
+ auto Rdx = Builder.CreateFAddReduce(
+ Constant::getNullValue(Src->getType()->getVectorElementType()), Src);
return Rdx;
};
break;
case Instruction::FMul:
BuildFunc = [&]() {
- auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src);
- cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
+ Type *Ty = Src->getType()->getVectorElementType();
+ auto Rdx = Builder.CreateFMulReduce(ConstantFP::get(Ty, 1.0), Src);
return Rdx;
};
break;
@@ -880,6 +884,12 @@ Value *llvm::createTargetReduction(IRBuilder<> &B,
RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();
TargetTransformInfo::ReductionFlags Flags;
Flags.NoNaN = NoNaN;
+
+ // All ops in the reduction inherit fast-math-flags from the recurrence
+ // descriptor.
+ IRBuilder<>::FastMathFlagGuard FMFGuard(B);
+ B.setFastMathFlags(Desc.getFastMathFlags());
+
switch (RecKind) {
case RD::RK_FloatAdd:
return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);
diff --git a/lib/Transforms/Utils/LoopVersioning.cpp b/lib/Transforms/Utils/LoopVersioning.cpp
index abbcd5f9e3b8..a9a480a4b7f9 100644
--- a/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/lib/Transforms/Utils/LoopVersioning.cpp
@@ -1,9 +1,8 @@
//===- LoopVersioning.cpp - Utility to version a loop ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -281,8 +280,9 @@ public:
bool Changed = false;
for (Loop *L : Worklist) {
const LoopAccessInfo &LAI = LAA->getInfo(L);
- if (L->isLoopSimplifyForm() && (LAI.getNumRuntimePointerChecks() ||
- !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) {
+ if (L->isLoopSimplifyForm() && !LAI.hasConvergentOp() &&
+ (LAI.getNumRuntimePointerChecks() ||
+ !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) {
LoopVersioning LVer(LAI, L, LI, DT, SE);
LVer.versionLoop();
LVer.annotateLoopWithNoAlias();
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index c852d538b0d1..fe67e191dc62 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -1,9 +1,8 @@
//===- LowerInvoke.cpp - Eliminate Invoke instructions --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -53,7 +52,8 @@ static bool runImpl(Function &F) {
II->getOperandBundlesAsDefs(OpBundles);
// Insert a normal call instruction...
CallInst *NewCall =
- CallInst::Create(II->getCalledValue(), CallArgs, OpBundles, "", II);
+ CallInst::Create(II->getFunctionType(), II->getCalledValue(),
+ CallArgs, OpBundles, "", II);
NewCall->takeName(II);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 661b4fa5bcb7..0cc085dc366c 100644
--- a/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -1,9 +1,8 @@
//===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -73,7 +72,7 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
// Loop Body
Value *SrcGEP =
LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
- Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+ Value *Load = LoopBuilder.CreateLoad(LoopOpType, SrcGEP, SrcIsVolatile);
Value *DstGEP =
LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
@@ -115,7 +114,7 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
: RBuilder.CreateBitCast(SrcAddr, SrcPtrType);
Value *SrcGEP = RBuilder.CreateInBoundsGEP(
OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex));
- Value *Load = RBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+ Value *Load = RBuilder.CreateLoad(OpTy, SrcGEP, SrcIsVolatile);
// Cast destination to operand type and store.
PointerType *DstPtrType = PointerType::get(OpTy, DstAS);
@@ -182,7 +181,7 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB);
Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
- Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+ Value *Load = LoopBuilder.CreateLoad(LoopOpType, SrcGEP, SrcIsVolatile);
Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
@@ -235,7 +234,7 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
Value *SrcGEP =
ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset);
- Value *Load = ResBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+ Value *Load = ResBuilder.CreateLoad(Int8Type, SrcGEP, SrcIsVolatile);
Value *DstGEP =
ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset);
ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
@@ -293,6 +292,8 @@ static void createMemMoveLoop(Instruction *InsertBefore,
BasicBlock *OrigBB = InsertBefore->getParent();
Function *F = OrigBB->getParent();
+ Type *EltTy = cast<PointerType>(SrcAddr->getType())->getElementType();
+
// Create the a comparison of src and dst, based on which we jump to either
// the forward-copy part of the function (if src >= dst) or the backwards-copy
// part (if src < dst).
@@ -331,9 +332,10 @@ static void createMemMoveLoop(Instruction *InsertBefore,
Value *IndexPtr = LoopBuilder.CreateSub(
LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
Value *Element = LoopBuilder.CreateLoad(
- LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element");
- LoopBuilder.CreateStore(Element,
- LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr));
+ EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr),
+ "element");
+ LoopBuilder.CreateStore(
+ Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr));
LoopBuilder.CreateCondBr(
LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
ExitBB, LoopBB);
@@ -348,9 +350,10 @@ static void createMemMoveLoop(Instruction *InsertBefore,
IRBuilder<> FwdLoopBuilder(FwdLoopBB);
PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr");
Value *FwdElement = FwdLoopBuilder.CreateLoad(
- FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element");
+ EltTy, FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi),
+ "element");
FwdLoopBuilder.CreateStore(
- FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi));
+ FwdElement, FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi));
Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment");
FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index d019a44fc705..8256e3b5f5af 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -1,9 +1,8 @@
//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,8 +16,12 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -28,6 +31,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -58,9 +62,8 @@ static bool IsInRanges(const IntRange &R,
// Find the first range whose High field is >= R.High,
// then check if the Low field is <= R.Low. If so, we
// have a Range that covers R.
- auto I = std::lower_bound(
- Ranges.begin(), Ranges.end(), R,
- [](const IntRange &A, const IntRange &B) { return A.High < B.High; });
+ auto I = llvm::lower_bound(
+ Ranges, R, [](IntRange A, IntRange B) { return A.High < B.High; });
return I != Ranges.end() && I->Low <= R.Low;
}
@@ -78,6 +81,10 @@ namespace {
bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LazyValueInfoWrapperPass>();
+ }
+
struct CaseRange {
ConstantInt* Low;
ConstantInt* High;
@@ -91,15 +98,18 @@ namespace {
using CaseItr = std::vector<CaseRange>::iterator;
private:
- void processSwitchInst(SwitchInst *SI, SmallPtrSetImpl<BasicBlock*> &DeleteList);
+ void processSwitchInst(SwitchInst *SI,
+ SmallPtrSetImpl<BasicBlock *> &DeleteList,
+ AssumptionCache *AC, LazyValueInfo *LVI);
BasicBlock *switchConvert(CaseItr Begin, CaseItr End,
ConstantInt *LowerBound, ConstantInt *UpperBound,
Value *Val, BasicBlock *Predecessor,
BasicBlock *OrigBlock, BasicBlock *Default,
const std::vector<IntRange> &UnreachableRanges);
- BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, BasicBlock *OrigBlock,
- BasicBlock *Default);
+ BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val,
+ ConstantInt *LowerBound, ConstantInt *UpperBound,
+ BasicBlock *OrigBlock, BasicBlock *Default);
unsigned Clusterify(CaseVector &Cases, SwitchInst *SI);
};
@@ -121,8 +131,12 @@ char LowerSwitch::ID = 0;
// Publicly exposed interface to pass...
char &llvm::LowerSwitchID = LowerSwitch::ID;
-INITIALIZE_PASS(LowerSwitch, "lowerswitch",
- "Lower SwitchInst's to branches", false, false)
+INITIALIZE_PASS_BEGIN(LowerSwitch, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
+INITIALIZE_PASS_END(LowerSwitch, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false)
// createLowerSwitchPass - Interface to this file...
FunctionPass *llvm::createLowerSwitchPass() {
@@ -130,6 +144,17 @@ FunctionPass *llvm::createLowerSwitchPass() {
}
bool LowerSwitch::runOnFunction(Function &F) {
+ LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
+ auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>();
+ AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr;
+ // Prevent LazyValueInfo from using the DominatorTree as LowerSwitch does not
+ // preserve it and it becomes stale (when available) pretty much immediately.
+ // Currently the DominatorTree is only used by LowerSwitch indirectly via LVI
+ // and computeKnownBits to refine isValidAssumeForContext's results. Given
+ // that the latter can handle some of the simple cases w/o a DominatorTree,
+ // it's easier to refrain from using the tree than to keep it up to date.
+ LVI->disableDT();
+
bool Changed = false;
SmallPtrSet<BasicBlock*, 8> DeleteList;
@@ -143,11 +168,12 @@ bool LowerSwitch::runOnFunction(Function &F) {
if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
Changed = true;
- processSwitchInst(SI, DeleteList);
+ processSwitchInst(SI, DeleteList, AC, LVI);
}
}
for (BasicBlock* BB: DeleteList) {
+ LVI->eraseBlock(BB);
DeleteDeadBlock(BB);
}
@@ -160,10 +186,11 @@ static raw_ostream &operator<<(raw_ostream &O,
const LowerSwitch::CaseVector &C) {
O << "[";
- for (LowerSwitch::CaseVector::const_iterator B = C.begin(),
- E = C.end(); B != E; ) {
- O << *B->Low << " -" << *B->High;
- if (++B != E) O << ", ";
+ for (LowerSwitch::CaseVector::const_iterator B = C.begin(), E = C.end();
+ B != E;) {
+ O << "[" << B->Low->getValue() << ", " << B->High->getValue() << "]";
+ if (++B != E)
+ O << ", ";
}
return O << "]";
@@ -179,8 +206,9 @@ static raw_ostream &operator<<(raw_ostream &O,
/// 2) Removed if subsequent incoming values now share the same case, i.e.,
/// multiple outcome edges are condensed into one. This is necessary to keep the
/// number of phi values equal to the number of branches to SuccBB.
-static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
- unsigned NumMergedCases) {
+static void
+fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
+ const unsigned NumMergedCases = std::numeric_limits<unsigned>::max()) {
for (BasicBlock::iterator I = SuccBB->begin(),
IE = SuccBB->getFirstNonPHI()->getIterator();
I != IE; ++I) {
@@ -222,6 +250,7 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
BasicBlock *Predecessor, BasicBlock *OrigBlock,
BasicBlock *Default,
const std::vector<IntRange> &UnreachableRanges) {
+ assert(LowerBound && UpperBound && "Bounds must be initialized");
unsigned Size = End - Begin;
if (Size == 1) {
@@ -231,13 +260,12 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
// because the bounds already tell us so.
if (Begin->Low == LowerBound && Begin->High == UpperBound) {
unsigned NumMergedCases = 0;
- if (LowerBound && UpperBound)
- NumMergedCases =
- UpperBound->getSExtValue() - LowerBound->getSExtValue();
+ NumMergedCases = UpperBound->getSExtValue() - LowerBound->getSExtValue();
fixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases);
return Begin->BB;
}
- return newLeafBlock(*Begin, Val, OrigBlock, Default);
+ return newLeafBlock(*Begin, Val, LowerBound, UpperBound, OrigBlock,
+ Default);
}
unsigned Mid = Size / 2;
@@ -247,8 +275,8 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
LLVM_DEBUG(dbgs() << "RHS: " << RHS << "\n");
CaseRange &Pivot = *(Begin + Mid);
- LLVM_DEBUG(dbgs() << "Pivot ==> " << Pivot.Low->getValue() << " -"
- << Pivot.High->getValue() << "\n");
+ LLVM_DEBUG(dbgs() << "Pivot ==> [" << Pivot.Low->getValue() << ", "
+ << Pivot.High->getValue() << "]\n");
// NewLowerBound here should never be the integer minimal value.
// This is because it is computed from a case range that is never
@@ -270,14 +298,10 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
NewUpperBound = LHS.back().High;
}
- LLVM_DEBUG(dbgs() << "LHS Bounds ==> "; if (LowerBound) {
- dbgs() << LowerBound->getSExtValue();
- } else { dbgs() << "NONE"; } dbgs() << " - "
- << NewUpperBound->getSExtValue() << "\n";
- dbgs() << "RHS Bounds ==> ";
- dbgs() << NewLowerBound->getSExtValue() << " - "; if (UpperBound) {
- dbgs() << UpperBound->getSExtValue() << "\n";
- } else { dbgs() << "NONE\n"; });
+ LLVM_DEBUG(dbgs() << "LHS Bounds ==> [" << LowerBound->getSExtValue() << ", "
+ << NewUpperBound->getSExtValue() << "]\n"
+ << "RHS Bounds ==> [" << NewLowerBound->getSExtValue()
+ << ", " << UpperBound->getSExtValue() << "]\n");
// Create a new node that checks if the value is < pivot. Go to the
// left branch if it is and right branch if not.
@@ -305,9 +329,11 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
/// switch's value == the case's value. If not, then it jumps to the default
/// branch. At this point in the tree, the value can't be another valid case
/// value, so the jump to the "default" branch is warranted.
-BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
- BasicBlock* OrigBlock,
- BasicBlock* Default) {
+BasicBlock *LowerSwitch::newLeafBlock(CaseRange &Leaf, Value *Val,
+ ConstantInt *LowerBound,
+ ConstantInt *UpperBound,
+ BasicBlock *OrigBlock,
+ BasicBlock *Default) {
Function* F = OrigBlock->getParent();
BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf);
@@ -320,10 +346,14 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
Leaf.Low, "SwitchLeaf");
} else {
// Make range comparison
- if (Leaf.Low->isMinValue(true /*isSigned*/)) {
+ if (Leaf.Low == LowerBound) {
// Val >= Min && Val <= Hi --> Val <= Hi
Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
"SwitchLeaf");
+ } else if (Leaf.High == UpperBound) {
+ // Val <= Max && Val >= Lo --> Val >= Lo
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SGE, Val, Leaf.Low,
+ "SwitchLeaf");
} else if (Leaf.Low->isZero()) {
// Val >= 0 && Val <= Hi --> Val <=u Hi
Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
@@ -363,14 +393,20 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
return NewLeaf;
}
-/// Transform simple list of Cases into list of CaseRange's.
+/// Transform simple list of \p SI's cases into list of CaseRange's \p Cases.
+/// \post \p Cases wouldn't contain references to \p SI's default BB.
+/// \returns Number of \p SI's cases that do not reference \p SI's default BB.
unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
- unsigned numCmps = 0;
+ unsigned NumSimpleCases = 0;
// Start with "simple" cases
- for (auto Case : SI->cases())
+ for (auto Case : SI->cases()) {
+ if (Case.getCaseSuccessor() == SI->getDefaultDest())
+ continue;
Cases.push_back(CaseRange(Case.getCaseValue(), Case.getCaseValue(),
Case.getCaseSuccessor()));
+ ++NumSimpleCases;
+ }
llvm::sort(Cases, CaseCmp());
@@ -396,60 +432,88 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
Cases.erase(std::next(I), Cases.end());
}
- for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
- if (I->Low != I->High)
- // A range counts double, since it requires two compares.
- ++numCmps;
- }
-
- return numCmps;
+ return NumSimpleCases;
}
/// Replace the specified switch instruction with a sequence of chained if-then
/// insts in a balanced binary search.
void LowerSwitch::processSwitchInst(SwitchInst *SI,
- SmallPtrSetImpl<BasicBlock*> &DeleteList) {
- BasicBlock *CurBlock = SI->getParent();
- BasicBlock *OrigBlock = CurBlock;
- Function *F = CurBlock->getParent();
+ SmallPtrSetImpl<BasicBlock *> &DeleteList,
+ AssumptionCache *AC, LazyValueInfo *LVI) {
+ BasicBlock *OrigBlock = SI->getParent();
+ Function *F = OrigBlock->getParent();
Value *Val = SI->getCondition(); // The value we are switching on...
BasicBlock* Default = SI->getDefaultDest();
// Don't handle unreachable blocks. If there are successors with phis, this
// would leave them behind with missing predecessors.
- if ((CurBlock != &F->getEntryBlock() && pred_empty(CurBlock)) ||
- CurBlock->getSinglePredecessor() == CurBlock) {
- DeleteList.insert(CurBlock);
+ if ((OrigBlock != &F->getEntryBlock() && pred_empty(OrigBlock)) ||
+ OrigBlock->getSinglePredecessor() == OrigBlock) {
+ DeleteList.insert(OrigBlock);
return;
}
+ // Prepare cases vector.
+ CaseVector Cases;
+ const unsigned NumSimpleCases = Clusterify(Cases, SI);
+ LLVM_DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total non-default cases: " << NumSimpleCases
+ << "\nCase clusters: " << Cases << "\n");
+
// If there is only the default destination, just branch.
- if (!SI->getNumCases()) {
- BranchInst::Create(Default, CurBlock);
+ if (Cases.empty()) {
+ BranchInst::Create(Default, OrigBlock);
+ // Remove all the references from Default's PHIs to OrigBlock, but one.
+ fixPhis(Default, OrigBlock, OrigBlock);
SI->eraseFromParent();
return;
}
- // Prepare cases vector.
- CaseVector Cases;
- unsigned numCmps = Clusterify(Cases, SI);
- LLVM_DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
- << ". Total compares: " << numCmps << "\n");
- LLVM_DEBUG(dbgs() << "Cases: " << Cases << "\n");
- (void)numCmps;
-
ConstantInt *LowerBound = nullptr;
ConstantInt *UpperBound = nullptr;
- std::vector<IntRange> UnreachableRanges;
+ bool DefaultIsUnreachableFromSwitch = false;
if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) {
// Make the bounds tightly fitted around the case value range, because we
// know that the value passed to the switch must be exactly one of the case
// values.
- assert(!Cases.empty());
LowerBound = Cases.front().Low;
UpperBound = Cases.back().High;
+ DefaultIsUnreachableFromSwitch = true;
+ } else {
+ // Constraining the range of the value being switched over helps eliminating
+ // unreachable BBs and minimizing the number of `add` instructions
+ // newLeafBlock ends up emitting. Running CorrelatedValuePropagation after
+ // LowerSwitch isn't as good, and also much more expensive in terms of
+ // compile time for the following reasons:
+ // 1. it processes many kinds of instructions, not just switches;
+ // 2. even if limited to icmp instructions only, it will have to process
+ // roughly C icmp's per switch, where C is the number of cases in the
+ // switch, while LowerSwitch only needs to call LVI once per switch.
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ KnownBits Known = computeKnownBits(Val, DL, /*Depth=*/0, AC, SI);
+ // TODO Shouldn't this create a signed range?
+ ConstantRange KnownBitsRange =
+ ConstantRange::fromKnownBits(Known, /*IsSigned=*/false);
+ const ConstantRange LVIRange = LVI->getConstantRange(Val, OrigBlock, SI);
+ ConstantRange ValRange = KnownBitsRange.intersectWith(LVIRange);
+ // We delegate removal of unreachable non-default cases to other passes. In
+ // the unlikely event that some of them survived, we just conservatively
+ // maintain the invariant that all the cases lie between the bounds. This
+ // may, however, still render the default case effectively unreachable.
+ APInt Low = Cases.front().Low->getValue();
+ APInt High = Cases.back().High->getValue();
+ APInt Min = APIntOps::smin(ValRange.getSignedMin(), Low);
+ APInt Max = APIntOps::smax(ValRange.getSignedMax(), High);
+
+ LowerBound = ConstantInt::get(SI->getContext(), Min);
+ UpperBound = ConstantInt::get(SI->getContext(), Max);
+ DefaultIsUnreachableFromSwitch = (Min + (NumSimpleCases - 1) == Max);
+ }
+
+ std::vector<IntRange> UnreachableRanges;
+ if (DefaultIsUnreachableFromSwitch) {
DenseMap<BasicBlock *, unsigned> Popularity;
unsigned MaxPop = 0;
BasicBlock *PopSucc = nullptr;
@@ -496,8 +560,10 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
#endif
// As the default block in the switch is unreachable, update the PHI nodes
- // (remove the entry to the default block) to reflect this.
- Default->removePredecessor(OrigBlock);
+ // (remove all of the references to the default block) to reflect this.
+ const unsigned NumDefaultEdges = SI->getNumCases() + 1 - NumSimpleCases;
+ for (unsigned I = 0; I < NumDefaultEdges; ++I)
+ Default->removePredecessor(OrigBlock);
// Use the most popular block as the new default, reducing the number of
// cases.
@@ -510,7 +576,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
// If there are no cases left, just branch.
if (Cases.empty()) {
- BranchInst::Create(Default, CurBlock);
+ BranchInst::Create(Default, OrigBlock);
SI->eraseFromParent();
// As all the cases have been replaced with a single branch, only keep
// one entry in the PHI nodes.
@@ -518,12 +584,12 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
PopSucc->removePredecessor(OrigBlock);
return;
}
- }
- unsigned NrOfDefaults = (SI->getDefaultDest() == Default) ? 1 : 0;
- for (const auto &Case : SI->cases())
- if (Case.getCaseSuccessor() == Default)
- NrOfDefaults++;
+ // If the condition was a PHI node with the switch block as a predecessor
+ // removing predecessors may have caused the condition to be erased.
+ // Getting the condition value again here protects against that.
+ Val = SI->getCondition();
+ }
// Create a new, empty default block so that the new hierarchy of
// if-then statements go to this and the PHI nodes are happy.
@@ -537,14 +603,14 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
// If there are entries in any PHI nodes for the default edge, make sure
// to update them as well.
- fixPhis(Default, OrigBlock, NewDefault, NrOfDefaults);
+ fixPhis(Default, OrigBlock, NewDefault);
// Branch to our shiny new if-then stuff...
BranchInst::Create(SwitchBlock, OrigBlock);
// We are now done with the switch instruction, delete it.
BasicBlock *OldDefault = SI->getDefaultDest();
- CurBlock->getInstList().erase(SI);
+ OrigBlock->getInstList().erase(SI);
// If the Default block has no more predecessors just add it to DeleteList.
if (pred_begin(OldDefault) == pred_end(OldDefault))
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 23145e584751..cd2c81b6abc8 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -1,9 +1,8 @@
//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index 88d595ee02ab..c0b7edc547fd 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -1,9 +1,8 @@
//===- MetaRenamer.cpp - Rename everything with metasyntatic names --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index ae5e72ea4d30..c84beceee191 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -1,9 +1,8 @@
//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,44 +27,24 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F,
// Get the current set of static global constructors and add the new ctor
// to the list.
SmallVector<Constant *, 16> CurrentCtors;
- StructType *EltTy;
+ StructType *EltTy = StructType::get(
+ IRB.getInt32Ty(), PointerType::getUnqual(FnTy), IRB.getInt8PtrTy());
if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) {
- ArrayType *ATy = cast<ArrayType>(GVCtor->getValueType());
- StructType *OldEltTy = cast<StructType>(ATy->getElementType());
- // Upgrade a 2-field global array type to the new 3-field format if needed.
- if (Data && OldEltTy->getNumElements() < 3)
- EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
- IRB.getInt8PtrTy());
- else
- EltTy = OldEltTy;
if (Constant *Init = GVCtor->getInitializer()) {
unsigned n = Init->getNumOperands();
CurrentCtors.reserve(n + 1);
- for (unsigned i = 0; i != n; ++i) {
- auto Ctor = cast<Constant>(Init->getOperand(i));
- if (EltTy != OldEltTy)
- Ctor =
- ConstantStruct::get(EltTy, Ctor->getAggregateElement((unsigned)0),
- Ctor->getAggregateElement(1),
- Constant::getNullValue(IRB.getInt8PtrTy()));
- CurrentCtors.push_back(Ctor);
- }
+ for (unsigned i = 0; i != n; ++i)
+ CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
}
GVCtor->eraseFromParent();
- } else {
- // Use the new three-field struct if there isn't one already.
- EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
- IRB.getInt8PtrTy());
}
- // Build a 2 or 3 field global_ctor entry. We don't take a comdat key.
+ // Build a 3 field global_ctor entry. We don't take a comdat key.
Constant *CSVals[3];
CSVals[0] = IRB.getInt32(Priority);
CSVals[1] = F;
- // FIXME: Drop support for the two element form in LLVM 4.0.
- if (EltTy->getNumElements() >= 3)
- CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
- : Constant::getNullValue(IRB.getInt8PtrTy());
+ CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
+ : Constant::getNullValue(IRB.getInt8PtrTy());
Constant *RuntimeCtorInit =
ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements()));
@@ -127,36 +106,24 @@ void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
appendToUsedList(M, "llvm.compiler.used", Values);
}
-Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) {
- if (isa<Function>(FuncOrBitcast))
- return cast<Function>(FuncOrBitcast);
- FuncOrBitcast->print(errs());
- errs() << '\n';
- std::string Err;
- raw_string_ostream Stream(Err);
- Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast;
- report_fatal_error(Err);
-}
-
-Function *llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
- ArrayRef<Type *> InitArgTypes) {
+FunctionCallee
+llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
+ ArrayRef<Type *> InitArgTypes) {
assert(!InitName.empty() && "Expected init function name");
- Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ return M.getOrInsertFunction(
InitName,
FunctionType::get(Type::getVoidTy(M.getContext()), InitArgTypes, false),
- AttributeList()));
- F->setLinkage(Function::ExternalLinkage);
- return F;
+ AttributeList());
}
-std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
+std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
Module &M, StringRef CtorName, StringRef InitName,
ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
StringRef VersionCheckName) {
assert(!InitName.empty() && "Expected init function name");
assert(InitArgs.size() == InitArgTypes.size() &&
"Sanitizer's init function expects different number of arguments");
- Function *InitFunction =
+ FunctionCallee InitFunction =
declareSanitizerInitFunction(M, InitName, InitArgTypes);
Function *Ctor = Function::Create(
FunctionType::get(Type::getVoidTy(M.getContext()), false),
@@ -165,20 +132,19 @@ std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB));
IRB.CreateCall(InitFunction, InitArgs);
if (!VersionCheckName.empty()) {
- Function *VersionCheckFunction =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
- AttributeList()));
+ FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
+ VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
+ AttributeList());
IRB.CreateCall(VersionCheckFunction, {});
}
return std::make_pair(Ctor, InitFunction);
}
-std::pair<Function *, Function *>
+std::pair<Function *, FunctionCallee>
llvm::getOrCreateSanitizerCtorAndInitFunctions(
Module &M, StringRef CtorName, StringRef InitName,
ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
- function_ref<void(Function *, Function *)> FunctionsCreatedCallback,
+ function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
StringRef VersionCheckName) {
assert(!CtorName.empty() && "Expected ctor function name");
@@ -189,7 +155,8 @@ llvm::getOrCreateSanitizerCtorAndInitFunctions(
Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
return {Ctor, declareSanitizerInitFunction(M, InitName, InitArgTypes)};
- Function *Ctor, *InitFunction;
+ Function *Ctor;
+ FunctionCallee InitFunction;
std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName);
FunctionsCreatedCallback(Ctor, InitFunction);
@@ -208,9 +175,10 @@ Function *llvm::getOrCreateInitFunction(Module &M, StringRef Name) {
}
return F;
}
- Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- Name, AttributeList(), Type::getVoidTy(M.getContext())));
- F->setLinkage(Function::ExternalLinkage);
+ Function *F =
+ cast<Function>(M.getOrInsertFunction(Name, AttributeList(),
+ Type::getVoidTy(M.getContext()))
+ .getCallee());
appendToGlobalCtors(M, F, 0);
diff --git a/lib/Transforms/Utils/NameAnonGlobals.cpp b/lib/Transforms/Utils/NameAnonGlobals.cpp
index 34dc1cccdd5b..ac8991e9d475 100644
--- a/lib/Transforms/Utils/NameAnonGlobals.cpp
+++ b/lib/Transforms/Utils/NameAnonGlobals.cpp
@@ -1,9 +1,8 @@
//===- NameAnonGlobals.cpp - ThinLTO Support: Name Unnamed Globals --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp
index 585ce6b4c118..bdf24d80bd17 100644
--- a/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/lib/Transforms/Utils/PredicateInfo.cpp
@@ -1,9 +1,8 @@
//===-- PredicateInfo.cpp - PredicateInfo Builder--------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------===//
//
@@ -474,7 +473,8 @@ void PredicateInfo::buildPredicateInfo() {
}
for (auto &Assume : AC.assumptions()) {
if (auto *II = dyn_cast_or_null<IntrinsicInst>(Assume))
- processAssume(II, II->getParent(), OpsToRename);
+ if (DT.isReachableFromEntry(II->getParent()))
+ processAssume(II, II->getParent(), OpsToRename);
}
// Now rename all our operations.
renameUses(OpsToRename);
@@ -489,8 +489,10 @@ void PredicateInfo::buildPredicateInfo() {
// tricky (FIXME).
static Function *getCopyDeclaration(Module *M, Type *Ty) {
std::string Name = "llvm.ssa.copy." + utostr((uintptr_t) Ty);
- return cast<Function>(M->getOrInsertFunction(
- Name, getType(M->getContext(), Intrinsic::ssa_copy, Ty)));
+ return cast<Function>(
+ M->getOrInsertFunction(Name,
+ getType(M->getContext(), Intrinsic::ssa_copy, Ty))
+ .getCallee());
}
// Given the renaming stack, make all the operands currently on the stack real
@@ -633,7 +635,7 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
// uses in the same instruction do not have a strict sort order
// currently and will be considered equal. We could get rid of the
// stable sort by creating one if we wanted.
- std::stable_sort(OrderedUses.begin(), OrderedUses.end(), Compare);
+ llvm::stable_sort(OrderedUses, Compare);
SmallVector<ValueDFS, 8> RenameStack;
// For each use, sorted into dfs order, push values and replaces uses with
// top of stack, which will represent the reaching def.
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 91e4f4254b3e..d58e1ea574ef 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -1,9 +1,8 @@
//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -114,7 +113,6 @@ struct AllocaInfo {
BasicBlock *OnlyBlock;
bool OnlyUsedInOneBlock;
- Value *AllocaPointerVal;
TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares;
void clear() {
@@ -123,7 +121,6 @@ struct AllocaInfo {
OnlyStore = nullptr;
OnlyBlock = nullptr;
OnlyUsedInOneBlock = true;
- AllocaPointerVal = nullptr;
DbgDeclares.clear();
}
@@ -141,14 +138,12 @@ struct AllocaInfo {
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Remember the basic blocks which define new values for the alloca
DefiningBlocks.push_back(SI->getParent());
- AllocaPointerVal = SI->getOperand(0);
OnlyStore = SI;
} else {
LoadInst *LI = cast<LoadInst>(User);
// Otherwise it must be a load instruction, keep track of variable
// reads.
UsingBlocks.push_back(LI->getParent());
- AllocaPointerVal = LI;
}
if (OnlyUsedInOneBlock) {
@@ -254,11 +249,6 @@ struct PromoteMem2Reg {
/// to.
DenseMap<PHINode *, unsigned> PhiToAllocaMap;
- /// If we are updating an AliasSetTracker, then for each alloca that is of
- /// pointer type, we keep track of what to copyValue to the inserted PHI
- /// nodes here.
- std::vector<Value *> PointerAllocaValues;
-
/// For each alloca, we keep track of the dbg.declare intrinsic that
/// describes it, if any, so that we can convert it to a dbg.value
/// intrinsic if the alloca gets promoted.
@@ -367,10 +357,8 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
Instruction *UserInst = cast<Instruction>(*UI++);
- if (!isa<LoadInst>(UserInst)) {
- assert(UserInst == OnlyStore && "Should only have load/stores");
+ if (UserInst == OnlyStore)
continue;
- }
LoadInst *LI = cast<LoadInst>(UserInst);
// Okay, if we have a load from the alloca, we want to replace it with the
@@ -390,8 +378,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
Info.UsingBlocks.push_back(StoreBB);
continue;
}
- } else if (LI->getParent() != StoreBB &&
- !DT.dominates(StoreBB, LI->getParent())) {
+ } else if (!DT.dominates(StoreBB, LI->getParent())) {
// If the load and store are in different blocks, use BB dominance to
// check their relationships. If the store doesn't dom the use, bail
// out.
@@ -429,14 +416,12 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
DII->eraseFromParent();
- LBI.deleteValue(DII);
}
// Remove the (now dead) store and alloca.
Info.OnlyStore->eraseFromParent();
LBI.deleteValue(Info.OnlyStore);
AI->eraseFromParent();
- LBI.deleteValue(AI);
return true;
}
@@ -488,11 +473,10 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
unsigned LoadIdx = LBI.getInstructionIndex(LI);
// Find the nearest store that has a lower index than this load.
- StoresByIndexTy::iterator I =
- std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
- std::make_pair(LoadIdx,
- static_cast<StoreInst *>(nullptr)),
- less_first());
+ StoresByIndexTy::iterator I = llvm::lower_bound(
+ StoresByIndex,
+ std::make_pair(LoadIdx, static_cast<StoreInst *>(nullptr)),
+ less_first());
if (I == StoresByIndex.begin()) {
if (StoresByIndex.empty())
// If there are no stores, the load takes the undef value.
@@ -535,13 +519,10 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
}
AI->eraseFromParent();
- LBI.deleteValue(AI);
// The alloca's debuginfo can be removed as well.
- for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
+ for (DbgVariableIntrinsic *DII : Info.DbgDeclares)
DII->eraseFromParent();
- LBI.deleteValue(DII);
- }
++NumLocalPromoted;
return true;
@@ -620,8 +601,8 @@ void PromoteMem2Reg::run() {
// dead phi nodes.
// Unique the set of defining blocks for efficient lookup.
- SmallPtrSet<BasicBlock *, 32> DefBlocks;
- DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
+ SmallPtrSet<BasicBlock *, 32> DefBlocks(Info.DefiningBlocks.begin(),
+ Info.DefiningBlocks.end());
// Determine which blocks the value is live in. These are blocks which lead
// to uses.
@@ -636,10 +617,9 @@ void PromoteMem2Reg::run() {
IDF.setDefiningBlocks(DefBlocks);
SmallVector<BasicBlock *, 32> PHIBlocks;
IDF.calculate(PHIBlocks);
- if (PHIBlocks.size() > 1)
- llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) {
- return BBNumbers.lookup(A) < BBNumbers.lookup(B);
- });
+ llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) {
+ return BBNumbers.find(A)->second < BBNumbers.find(B)->second;
+ });
unsigned CurrentVersion = 0;
for (BasicBlock *BB : PHIBlocks)
@@ -751,7 +731,7 @@ void PromoteMem2Reg::run() {
// basic blocks. Start by sorting the incoming predecessors for efficient
// access.
auto CompareBBNumbers = [this](BasicBlock *A, BasicBlock *B) {
- return BBNumbers.lookup(A) < BBNumbers.lookup(B);
+ return BBNumbers.find(A)->second < BBNumbers.find(B)->second;
};
llvm::sort(Preds, CompareBBNumbers);
@@ -759,9 +739,8 @@ void PromoteMem2Reg::run() {
// them from the Preds list.
for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
// Do a log(n) search of the Preds list for the entry we want.
- SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound(
- Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i),
- CompareBBNumbers);
+ SmallVectorImpl<BasicBlock *>::iterator EntIt = llvm::lower_bound(
+ Preds, SomePHI->getIncomingBlock(i), CompareBBNumbers);
assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
"PHI node has entry for a block which is not a predecessor!");
@@ -825,14 +804,11 @@ void PromoteMem2Reg::ComputeLiveInBlocks(
break;
}
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (LI->getOperand(0) != AI)
- continue;
-
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
// Okay, we found a load before a store to the alloca. It is actually
// live into this block.
- break;
- }
+ if (LI->getOperand(0) == AI)
+ break;
}
}
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 9e5fb0e7172d..bffdd115d940 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -1,9 +1,8 @@
//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -350,8 +349,7 @@ LoadAndStorePromoter(ArrayRef<const Instruction *> Insts,
SSA.Initialize(SomeVal->getType(), BaseName);
}
-void LoadAndStorePromoter::
-run(const SmallVectorImpl<Instruction *> &Insts) const {
+void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) {
// First step: bucket up uses of the alloca by the block they occur in.
// This is important because we have to handle multiple defs/uses in a block
// ourselves: SSAUpdater is purely for cross-block references.
diff --git a/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/lib/Transforms/Utils/SSAUpdaterBulk.cpp
index 397bac2940a4..917d5e0a1ef0 100644
--- a/lib/Transforms/Utils/SSAUpdaterBulk.cpp
+++ b/lib/Transforms/Utils/SSAUpdaterBulk.cpp
@@ -1,9 +1,8 @@
//===- SSAUpdaterBulk.cpp - Unstructured SSA Update Tool ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/SanitizerStats.cpp b/lib/Transforms/Utils/SanitizerStats.cpp
index 8c23957ac43e..a1313c77ed77 100644
--- a/lib/Transforms/Utils/SanitizerStats.cpp
+++ b/lib/Transforms/Utils/SanitizerStats.cpp
@@ -1,9 +1,8 @@
//===- SanitizerStats.cpp - Sanitizer statistics gathering ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -57,8 +56,8 @@ void SanitizerStatReport::create(IRBuilder<> &B, SanitizerStatKind SK) {
FunctionType *StatReportTy =
FunctionType::get(B.getVoidTy(), Int8PtrTy, false);
- Constant *StatReport = M->getOrInsertFunction(
- "__sanitizer_stat_report", StatReportTy);
+ FunctionCallee StatReport =
+ M->getOrInsertFunction("__sanitizer_stat_report", StatReportTy);
auto InitAddr = ConstantExpr::getGetElementPtr(
EmptyModuleStatsTy, ModuleStatsGV,
@@ -98,8 +97,8 @@ void SanitizerStatReport::finish() {
IRBuilder<> B(BB);
FunctionType *StatInitTy = FunctionType::get(VoidTy, Int8PtrTy, false);
- Constant *StatInit = M->getOrInsertFunction(
- "__sanitizer_stat_init", StatInitTy);
+ FunctionCallee StatInit =
+ M->getOrInsertFunction("__sanitizer_stat_init", StatInitTy);
B.CreateCall(StatInit, ConstantExpr::getBitCast(NewModuleStatsGV, Int8PtrTy));
B.CreateRetVoid();
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 03b73954321d..11651d040dc0 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1,9 +1,8 @@
//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,8 +25,9 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -66,6 +66,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
@@ -292,9 +293,13 @@ isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2,
/// will be the same as those coming in from ExistPred, an existing predecessor
/// of Succ.
static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
- BasicBlock *ExistPred) {
+ BasicBlock *ExistPred,
+ MemorySSAUpdater *MSSAU = nullptr) {
for (PHINode &PN : Succ->phis())
PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
+ if (MSSAU)
+ if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
+ MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
}
/// Compute an abstract "cost" of speculating the given instruction,
@@ -670,7 +675,8 @@ private:
} // end anonymous namespace
-static void EraseTerminatorAndDCECond(Instruction *TI) {
+static void EraseTerminatorAndDCECond(Instruction *TI,
+ MemorySSAUpdater *MSSAU = nullptr) {
Instruction *Cond = nullptr;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Cond = dyn_cast<Instruction>(SI->getCondition());
@@ -683,7 +689,7 @@ static void EraseTerminatorAndDCECond(Instruction *TI) {
TI->eraseFromParent();
if (Cond)
- RecursivelyDeleteTriviallyDeadInstructions(Cond);
+ RecursivelyDeleteTriviallyDeadInstructions(Cond, nullptr, MSSAU);
}
/// Return true if the specified terminator checks
@@ -858,7 +864,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
return true;
}
- SwitchInst *SI = cast<SwitchInst>(TI);
+ SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
// Okay, TI has cases that are statically dead, prune them away.
SmallPtrSet<Constant *, 16> DeadCases;
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
@@ -867,30 +873,13 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
<< "Through successor TI: " << *TI);
- // Collect branch weights into a vector.
- SmallVector<uint32_t, 8> Weights;
- MDNode *MD = SI->getMetadata(LLVMContext::MD_prof);
- bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases());
- if (HasWeight)
- for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
- ++MD_i) {
- ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(MD_i));
- Weights.push_back(CI->getValue().getZExtValue());
- }
for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
--i;
if (DeadCases.count(i->getCaseValue())) {
- if (HasWeight) {
- std::swap(Weights[i->getCaseIndex() + 1], Weights.back());
- Weights.pop_back();
- }
i->getCaseSuccessor()->removePredecessor(TI->getParent());
- SI->removeCase(i);
+ SI.removeCase(i);
}
}
- if (HasWeight && Weights.size() >= 2)
- setBranchWeights(SI, Weights);
-
LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
return true;
}
@@ -1266,8 +1255,10 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
while (isa<DbgInfoIntrinsic>(I2))
I2 = &*BB2_Itr++;
}
+ // FIXME: Can we define a safety predicate for CallBr?
if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
- (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
+ (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) ||
+ isa<CallBrInst>(I1))
return false;
BasicBlock *BIParent = BI->getParent();
@@ -1350,9 +1341,14 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
HoistTerminator:
// It may not be possible to hoist an invoke.
+ // FIXME: Can we define a safety predicate for CallBr?
if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
return Changed;
+ // TODO: callbr hoisting currently disabled pending further study.
+ if (isa<CallBrInst>(I1))
+ return Changed;
+
for (BasicBlock *Succ : successors(BB1)) {
for (PHINode &PN : Succ->phis()) {
Value *BB1V = PN.getIncomingValueForBlock(BB1);
@@ -1432,9 +1428,10 @@ HoistTerminator:
static bool canSinkInstructions(
ArrayRef<Instruction *> Insts,
DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
- // Prune out obviously bad instructions to move. Any non-store instruction
- // must have exactly one use, and we check later that use is by a single,
- // common PHI instruction in the successor.
+ // Prune out obviously bad instructions to move. Each instruction must have
+ // exactly zero or one use, and we check later that use is by a single, common
+ // PHI instruction in the successor.
+ bool HasUse = !Insts.front()->user_empty();
for (auto *I : Insts) {
// These instructions may change or break semantics if moved.
if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
@@ -1444,13 +1441,14 @@ static bool canSinkInstructions(
// Conservatively return false if I is an inline-asm instruction. Sinking
// and merging inline-asm instructions can potentially create arguments
// that cannot satisfy the inline-asm constraints.
- if (const auto *C = dyn_cast<CallInst>(I))
+ if (const auto *C = dyn_cast<CallBase>(I))
if (C->isInlineAsm())
return false;
- // Everything must have only one use too, apart from stores which
- // have no uses.
- if (!isa<StoreInst>(I) && !I->hasOneUse())
+ // Each instruction must have zero or one use.
+ if (HasUse && !I->hasOneUse())
+ return false;
+ if (!HasUse && !I->user_empty())
return false;
}
@@ -1459,11 +1457,11 @@ static bool canSinkInstructions(
if (!I->isSameOperationAs(I0))
return false;
- // All instructions in Insts are known to be the same opcode. If they aren't
- // stores, check the only user of each is a PHI or in the same block as the
- // instruction, because if a user is in the same block as an instruction
- // we're contemplating sinking, it must already be determined to be sinkable.
- if (!isa<StoreInst>(I0)) {
+ // All instructions in Insts are known to be the same opcode. If they have a
+ // use, check that the only user is a PHI or in the same block as the
+ // instruction, because if a user is in the same block as an instruction we're
+ // contemplating sinking, it must already be determined to be sinkable.
+ if (HasUse) {
auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
@@ -1507,7 +1505,7 @@ static bool canSinkInstructions(
// We can't create a PHI from this GEP.
return false;
// Don't create indirect calls! The called value is the final operand.
- if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OI == OE - 1) {
+ if (isa<CallBase>(I0) && OI == OE - 1) {
// FIXME: if the call was *already* indirect, we should do this.
return false;
}
@@ -1541,7 +1539,7 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
// it is slightly over-aggressive - it gets confused by commutative instructions
// so double-check it here.
Instruction *I0 = Insts.front();
- if (!isa<StoreInst>(I0)) {
+ if (!I0->user_empty()) {
auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
auto *U = cast<Instruction>(*I->user_begin());
@@ -1599,11 +1597,10 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
I0->andIRFlags(I);
}
- if (!isa<StoreInst>(I0)) {
+ if (!I0->user_empty()) {
// canSinkLastInstruction checked that all instructions were used by
// one and only one PHI node. Find that now, RAUW it to our common
// instruction and nuke it.
- assert(I0->hasOneUse());
auto *PN = cast<PHINode>(*I0->user_begin());
PN->replaceAllUsesWith(I0);
PN->eraseFromParent();
@@ -2203,7 +2200,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
BasicBlock *EdgeBB =
BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge",
RealDest->getParent(), RealDest);
- BranchInst::Create(RealDest, EdgeBB);
+ BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB);
+ CritEdgeBranch->setDebugLoc(BI->getDebugLoc());
// Update PHI nodes.
AddPredecessorToBlock(RealDest, EdgeBB, BB);
@@ -2539,7 +2537,8 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
/// If this basic block is simple enough, and if a predecessor branches to us
/// and one of our successors, fold the block into the predecessor and use
/// logical operations to pick the right destination.
-bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
+bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
+ unsigned BonusInstThreshold) {
BasicBlock *BB = BI->getParent();
const unsigned PredCount = pred_size(BB);
@@ -2594,7 +2593,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// unconditionally. We denote all involved instructions except the condition
// as "bonus instructions", and only allow this transformation when the
// number of the bonus instructions we'll need to create when cloning into
- // each predecessor does not exceed a certain threshold.
+ // each predecessor does not exceed a certain threshold.
unsigned NumBonusInsts = 0;
for (auto I = BB->begin(); Cond != &*I; ++I) {
// Ignore dbg intrinsics.
@@ -2611,7 +2610,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// and Cond.
// Account for the cost of duplicating this instruction into each
- // predecessor.
+ // predecessor.
NumBonusInsts += PredCount;
// Early exits once we reach the limit.
if (NumBonusInsts > BonusInstThreshold)
@@ -2750,7 +2749,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
(SuccFalseWeight + SuccTrueWeight) +
PredTrueWeight * SuccFalseWeight);
}
- AddPredecessorToBlock(TrueDest, PredBlock, BB);
+ AddPredecessorToBlock(TrueDest, PredBlock, BB, MSSAU);
PBI->setSuccessor(0, TrueDest);
}
if (PBI->getSuccessor(1) == BB) {
@@ -2765,7 +2764,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// FalseWeight is FalseWeight for PBI * FalseWeight for BI.
NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
}
- AddPredecessorToBlock(FalseDest, PredBlock, BB);
+ AddPredecessorToBlock(FalseDest, PredBlock, BB, MSSAU);
PBI->setSuccessor(1, FalseDest);
}
if (NewWeights.size() == 2) {
@@ -2810,12 +2809,17 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
}
}
// Update PHI Node.
- PHIs[i]->setIncomingValue(PHIs[i]->getBasicBlockIndex(PBI->getParent()),
- MergedCond);
+ PHIs[i]->setIncomingValueForBlock(PBI->getParent(), MergedCond);
}
+
+ // PBI is changed to branch to TrueDest below. Remove itself from
+ // potential phis from all other successors.
+ if (MSSAU)
+ MSSAU->changeCondBranchToUnconditionalTo(PBI, TrueDest);
+
// Change PBI from Conditional to Unconditional.
BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI);
- EraseTerminatorAndDCECond(PBI);
+ EraseTerminatorAndDCECond(PBI, MSSAU);
PBI = New_PBI;
}
@@ -3430,7 +3434,7 @@ static bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
KeepEdge2 = nullptr;
else
Succ->removePredecessor(OldTerm->getParent(),
- /*DontDeleteUselessPHIs=*/true);
+ /*KeepOneInputPHIs=*/true);
}
IRBuilder<> Builder(OldTerm);
@@ -3622,20 +3626,16 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
// the switch to the merge point on the compared value.
BasicBlock *NewBB =
BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
- SmallVector<uint64_t, 8> Weights;
- bool HasWeights = HasBranchWeights(SI);
- if (HasWeights) {
- GetBranchWeights(SI, Weights);
- if (Weights.size() == 1 + SI->getNumCases()) {
- // Split weight for default case to case for "Cst".
- Weights[0] = (Weights[0] + 1) >> 1;
- Weights.push_back(Weights[0]);
-
- SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
- setBranchWeights(SI, MDWeights);
+ {
+ SwitchInstProfUpdateWrapper SIW(*SI);
+ auto W0 = SIW.getSuccessorWeight(0);
+ SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
+ if (W0) {
+ NewW = ((uint64_t(*W0) + 1) >> 1);
+ SIW.setSuccessorWeight(0, *NewW);
}
+ SIW.addCase(Cst, NewBB, NewW);
}
- SI->addCase(Cst, NewBB);
// NewBB branches to the phi block, add the uncond branch and the phi entry.
Builder.SetInsertPoint(NewBB);
@@ -4184,24 +4184,28 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
Changed = true;
}
} else {
+ Value* Cond = BI->getCondition();
if (BI->getSuccessor(0) == BB) {
+ Builder.CreateAssumption(Builder.CreateNot(Cond));
Builder.CreateBr(BI->getSuccessor(1));
EraseTerminatorAndDCECond(BI);
} else if (BI->getSuccessor(1) == BB) {
+ Builder.CreateAssumption(Cond);
Builder.CreateBr(BI->getSuccessor(0));
EraseTerminatorAndDCECond(BI);
Changed = true;
}
}
} else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
- for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) {
+ SwitchInstProfUpdateWrapper SU(*SI);
+ for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
if (i->getCaseSuccessor() != BB) {
++i;
continue;
}
- BB->removePredecessor(SI->getParent());
- i = SI->removeCase(i);
- e = SI->case_end();
+ BB->removePredecessor(SU->getParent());
+ i = SU.removeCase(i);
+ e = SU->case_end();
Changed = true;
}
} else if (auto *II = dyn_cast<InvokeInst>(TI)) {
@@ -4435,33 +4439,20 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
return true;
}
- SmallVector<uint64_t, 8> Weights;
- bool HasWeight = HasBranchWeights(SI);
- if (HasWeight) {
- GetBranchWeights(SI, Weights);
- HasWeight = (Weights.size() == 1 + SI->getNumCases());
- }
+ if (DeadCases.empty())
+ return false;
- // Remove dead cases from the switch.
+ SwitchInstProfUpdateWrapper SIW(*SI);
for (ConstantInt *DeadCase : DeadCases) {
SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
assert(CaseI != SI->case_default() &&
"Case was not found. Probably mistake in DeadCases forming.");
- if (HasWeight) {
- std::swap(Weights[CaseI->getCaseIndex() + 1], Weights.back());
- Weights.pop_back();
- }
-
// Prune unused values from PHI nodes.
CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
- SI->removeCase(CaseI);
- }
- if (HasWeight && Weights.size() >= 2) {
- SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
- setBranchWeights(SI, MDWeights);
+ SIW.removeCase(CaseI);
}
- return !DeadCases.empty();
+ return true;
}
/// If BB would be eligible for simplification by
@@ -5034,7 +5025,7 @@ SwitchLookupTable::SwitchLookupTable(
ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
- Array = new GlobalVariable(M, ArrayTy, /*constant=*/true,
+ Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
GlobalVariable::PrivateLinkage, Initializer,
"switch.table." + FuncName);
Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
@@ -5091,7 +5082,9 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
Value *GEPIndices[] = {Builder.getInt32(0), Index};
Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
GEPIndices, "switch.gep");
- return Builder.CreateLoad(GEP, "switch.load");
+ return Builder.CreateLoad(
+ cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
+ "switch.load");
}
}
llvm_unreachable("Unknown lookup table kind!");
@@ -5425,7 +5418,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
// do not delete PHINodes here.
SI->getDefaultDest()->removePredecessor(SI->getParent(),
- /*DontDeleteUselessPHIs=*/true);
+ /*KeepOneInputPHIs=*/true);
}
bool ReturnedEarly = false;
@@ -5533,25 +5526,23 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
// Now we have signed numbers that have been shifted so that, given enough
// precision, there are no negative values. Since the rest of the transform
// is bitwise only, we switch now to an unsigned representation.
- uint64_t GCD = 0;
- for (auto &V : Values)
- GCD = GreatestCommonDivisor64(GCD, (uint64_t)V);
- // This transform can be done speculatively because it is so cheap - it results
- // in a single rotate operation being inserted. This can only happen if the
- // factor extracted is a power of 2.
- // FIXME: If the GCD is an odd number we can multiply by the multiplicative
- // inverse of GCD and then perform this transform.
+ // This transform can be done speculatively because it is so cheap - it
+ // results in a single rotate operation being inserted.
// FIXME: It's possible that optimizing a switch on powers of two might also
// be beneficial - flag values are often powers of two and we could use a CLZ
// as the key function.
- if (GCD <= 1 || !isPowerOf2_64(GCD))
- // No common divisor found or too expensive to compute key function.
- return false;
- unsigned Shift = Log2_64(GCD);
+ // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
+ // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
+ // less than 64.
+ unsigned Shift = 64;
for (auto &V : Values)
- V = (int64_t)((uint64_t)V >> Shift);
+ Shift = std::min(Shift, countTrailingZeros((uint64_t)V));
+ assert(Shift < 64);
+ if (Shift > 0)
+ for (auto &V : Values)
+ V = (int64_t)((uint64_t)V >> Shift);
if (!isSwitchDense(Values))
// Transform didn't create a dense switch.
@@ -5796,7 +5787,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
// branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
- if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold))
+ if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
return requestResimplify();
return false;
}
@@ -5860,7 +5851,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
- if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold))
+ if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
return requestResimplify();
// We have a conditional branch to two blocks that are only reachable
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 7faf291e73d9..cbb114f9a47a 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -1,9 +1,8 @@
//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,6 +22,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -80,7 +80,8 @@ namespace {
bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
bool replaceIVUserWithLoopInvariant(Instruction *UseInst);
- bool eliminateOverflowIntrinsic(CallInst *CI);
+ bool eliminateOverflowIntrinsic(WithOverflowInst *WO);
+ bool eliminateSaturatingIntrinsic(SaturatingInst *SI);
bool eliminateTrunc(TruncInst *TI);
bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand);
@@ -401,61 +402,29 @@ void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand,
replaceSRemWithURem(Rem);
}
-bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
- auto *F = CI->getCalledFunction();
- if (!F)
- return false;
-
- typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)(
- const SCEV *, const SCEV *, SCEV::NoWrapFlags, unsigned);
- typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)(
- const SCEV *, Type *, unsigned);
-
- OperationFunctionTy Operation;
- ExtensionFunctionTy Extension;
-
- Instruction::BinaryOps RawOp;
-
- // We always have exactly one of nsw or nuw. If NoSignedOverflow is false, we
- // have nuw.
- bool NoSignedOverflow;
-
- switch (F->getIntrinsicID()) {
+static bool willNotOverflow(ScalarEvolution *SE, Instruction::BinaryOps BinOp,
+ bool Signed, const SCEV *LHS, const SCEV *RHS) {
+ const SCEV *(ScalarEvolution::*Operation)(const SCEV *, const SCEV *,
+ SCEV::NoWrapFlags, unsigned);
+ switch (BinOp) {
default:
- return false;
-
- case Intrinsic::sadd_with_overflow:
- Operation = &ScalarEvolution::getAddExpr;
- Extension = &ScalarEvolution::getSignExtendExpr;
- RawOp = Instruction::Add;
- NoSignedOverflow = true;
- break;
-
- case Intrinsic::uadd_with_overflow:
+ llvm_unreachable("Unsupported binary op");
+ case Instruction::Add:
Operation = &ScalarEvolution::getAddExpr;
- Extension = &ScalarEvolution::getZeroExtendExpr;
- RawOp = Instruction::Add;
- NoSignedOverflow = false;
break;
-
- case Intrinsic::ssub_with_overflow:
+ case Instruction::Sub:
Operation = &ScalarEvolution::getMinusSCEV;
- Extension = &ScalarEvolution::getSignExtendExpr;
- RawOp = Instruction::Sub;
- NoSignedOverflow = true;
break;
-
- case Intrinsic::usub_with_overflow:
- Operation = &ScalarEvolution::getMinusSCEV;
- Extension = &ScalarEvolution::getZeroExtendExpr;
- RawOp = Instruction::Sub;
- NoSignedOverflow = false;
+ case Instruction::Mul:
+ Operation = &ScalarEvolution::getMulExpr;
break;
}
- const SCEV *LHS = SE->getSCEV(CI->getArgOperand(0));
- const SCEV *RHS = SE->getSCEV(CI->getArgOperand(1));
+ const SCEV *(ScalarEvolution::*Extension)(const SCEV *, Type *, unsigned) =
+ Signed ? &ScalarEvolution::getSignExtendExpr
+ : &ScalarEvolution::getZeroExtendExpr;
+ // Check ext(LHS op RHS) == ext(LHS) op ext(RHS)
auto *NarrowTy = cast<IntegerType>(LHS->getType());
auto *WideTy =
IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
@@ -466,27 +435,32 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
const SCEV *B =
(SE->*Operation)((SE->*Extension)(LHS, WideTy, 0),
(SE->*Extension)(RHS, WideTy, 0), SCEV::FlagAnyWrap, 0);
+ return A == B;
+}
- if (A != B)
+bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) {
+ const SCEV *LHS = SE->getSCEV(WO->getLHS());
+ const SCEV *RHS = SE->getSCEV(WO->getRHS());
+ if (!willNotOverflow(SE, WO->getBinaryOp(), WO->isSigned(), LHS, RHS))
return false;
// Proved no overflow, nuke the overflow check and, if possible, the overflow
// intrinsic as well.
BinaryOperator *NewResult = BinaryOperator::Create(
- RawOp, CI->getArgOperand(0), CI->getArgOperand(1), "", CI);
+ WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), "", WO);
- if (NoSignedOverflow)
+ if (WO->isSigned())
NewResult->setHasNoSignedWrap(true);
else
NewResult->setHasNoUnsignedWrap(true);
SmallVector<ExtractValueInst *, 4> ToDelete;
- for (auto *U : CI->users()) {
+ for (auto *U : WO->users()) {
if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
if (EVI->getIndices()[0] == 1)
- EVI->replaceAllUsesWith(ConstantInt::getFalse(CI->getContext()));
+ EVI->replaceAllUsesWith(ConstantInt::getFalse(WO->getContext()));
else {
assert(EVI->getIndices()[0] == 0 && "Only two possibilities!");
EVI->replaceAllUsesWith(NewResult);
@@ -498,9 +472,28 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
for (auto *EVI : ToDelete)
EVI->eraseFromParent();
- if (CI->use_empty())
- CI->eraseFromParent();
+ if (WO->use_empty())
+ WO->eraseFromParent();
+
+ return true;
+}
+
+bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst *SI) {
+ const SCEV *LHS = SE->getSCEV(SI->getLHS());
+ const SCEV *RHS = SE->getSCEV(SI->getRHS());
+ if (!willNotOverflow(SE, SI->getBinaryOp(), SI->isSigned(), LHS, RHS))
+ return false;
+
+ BinaryOperator *BO = BinaryOperator::Create(
+ SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI);
+ if (SI->isSigned())
+ BO->setHasNoSignedWrap();
+ else
+ BO->setHasNoUnsignedWrap();
+ SI->replaceAllUsesWith(BO);
+ DeadInsts.emplace_back(SI);
+ Changed = true;
return true;
}
@@ -548,20 +541,19 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
if (isa<Instruction>(U) &&
!DT->isReachableFromEntry(cast<Instruction>(U)->getParent()))
continue;
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) {
- if (ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) {
- assert(L->contains(ICI->getParent()) && "LCSSA form broken?");
- // If we cannot get rid of trunc, bail.
- if (ICI->isSigned() && !DoesSExtCollapse)
- return false;
- if (ICI->isUnsigned() && !DoesZExtCollapse)
- return false;
- // For equality, either signed or unsigned works.
- ICmpUsers.push_back(ICI);
- } else
- return false;
- } else
+ ICmpInst *ICI = dyn_cast<ICmpInst>(U);
+ if (!ICI) return false;
+ assert(L->contains(ICI->getParent()) && "LCSSA form broken?");
+ if (!(ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) &&
+ !(ICI->getOperand(1) == TI && L->isLoopInvariant(ICI->getOperand(0))))
return false;
+ // If we cannot get rid of trunc, bail.
+ if (ICI->isSigned() && !DoesSExtCollapse)
+ return false;
+ if (ICI->isUnsigned() && !DoesZExtCollapse)
+ return false;
+ // For equality, either signed or unsigned works.
+ ICmpUsers.push_back(ICI);
}
auto CanUseZExt = [&](ICmpInst *ICI) {
@@ -584,7 +576,8 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
};
// Replace all comparisons against trunc with comparisons against IV.
for (auto *ICI : ICmpUsers) {
- auto *Op1 = ICI->getOperand(1);
+ bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0));
+ auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1);
Instruction *Ext = nullptr;
// For signed/unsigned predicate, replace the old comparison with comparison
// of immediate IV against sext/zext of the invariant argument. If we can
@@ -593,6 +586,7 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
// TODO: If we see a signed comparison which can be turned into unsigned,
// we can do it here for canonicalization purposes.
ICmpInst::Predicate Pred = ICI->getPredicate();
+ if (IsSwapped) Pred = ICmpInst::getSwappedPredicate(Pred);
if (CanUseZExt(ICI)) {
assert(DoesZExtCollapse && "Unprofitable zext?");
Ext = new ZExtInst(Op1, IVTy, "zext", ICI);
@@ -636,8 +630,12 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
return eliminateSDiv(Bin);
}
- if (auto *CI = dyn_cast<CallInst>(UseInst))
- if (eliminateOverflowIntrinsic(CI))
+ if (auto *WO = dyn_cast<WithOverflowInst>(UseInst))
+ if (eliminateOverflowIntrinsic(WO))
+ return true;
+
+ if (auto *SI = dyn_cast<SaturatingInst>(UseInst))
+ if (eliminateSaturatingIntrinsic(SI))
return true;
if (auto *TI = dyn_cast<TruncInst>(UseInst))
@@ -730,59 +728,31 @@ bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
/// unsigned-overflow. Returns true if anything changed, false otherwise.
bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
Value *IVOperand) {
-
// Fastpath: we don't have any work to do if `BO` is `nuw` and `nsw`.
if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap())
return false;
- const SCEV *(ScalarEvolution::*GetExprForBO)(const SCEV *, const SCEV *,
- SCEV::NoWrapFlags, unsigned);
- switch (BO->getOpcode()) {
- default:
+ if (BO->getOpcode() != Instruction::Add &&
+ BO->getOpcode() != Instruction::Sub &&
+ BO->getOpcode() != Instruction::Mul)
return false;
- case Instruction::Add:
- GetExprForBO = &ScalarEvolution::getAddExpr;
- break;
-
- case Instruction::Sub:
- GetExprForBO = &ScalarEvolution::getMinusSCEV;
- break;
-
- case Instruction::Mul:
- GetExprForBO = &ScalarEvolution::getMulExpr;
- break;
- }
-
- unsigned BitWidth = cast<IntegerType>(BO->getType())->getBitWidth();
- Type *WideTy = IntegerType::get(BO->getContext(), BitWidth * 2);
const SCEV *LHS = SE->getSCEV(BO->getOperand(0));
const SCEV *RHS = SE->getSCEV(BO->getOperand(1));
-
bool Changed = false;
- if (!BO->hasNoUnsignedWrap()) {
- const SCEV *ExtendAfterOp = SE->getZeroExtendExpr(SE->getSCEV(BO), WideTy);
- const SCEV *OpAfterExtend = (SE->*GetExprForBO)(
- SE->getZeroExtendExpr(LHS, WideTy), SE->getZeroExtendExpr(RHS, WideTy),
- SCEV::FlagAnyWrap, 0u);
- if (ExtendAfterOp == OpAfterExtend) {
- BO->setHasNoUnsignedWrap();
- SE->forgetValue(BO);
- Changed = true;
- }
+ if (!BO->hasNoUnsignedWrap() &&
+ willNotOverflow(SE, BO->getOpcode(), /* Signed */ false, LHS, RHS)) {
+ BO->setHasNoUnsignedWrap();
+ SE->forgetValue(BO);
+ Changed = true;
}
- if (!BO->hasNoSignedWrap()) {
- const SCEV *ExtendAfterOp = SE->getSignExtendExpr(SE->getSCEV(BO), WideTy);
- const SCEV *OpAfterExtend = (SE->*GetExprForBO)(
- SE->getSignExtendExpr(LHS, WideTy), SE->getSignExtendExpr(RHS, WideTy),
- SCEV::FlagAnyWrap, 0u);
- if (ExtendAfterOp == OpAfterExtend) {
- BO->setHasNoSignedWrap();
- SE->forgetValue(BO);
- Changed = true;
- }
+ if (!BO->hasNoSignedWrap() &&
+ willNotOverflow(SE, BO->getOpcode(), /* Signed */ true, LHS, RHS)) {
+ BO->setHasNoSignedWrap();
+ SE->forgetValue(BO);
+ Changed = true;
}
return Changed;
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 1bb26caa2af2..e0def81d5eee 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1,9 +1,8 @@
//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,8 +16,10 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -35,6 +36,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
using namespace llvm;
using namespace PatternMatch;
@@ -105,6 +107,12 @@ static bool callHasFloatingPointArgument(const CallInst *CI) {
});
}
+static bool callHasFP128Argument(const CallInst *CI) {
+ return any_of(CI->operands(), [](const Use &OI) {
+ return OI->getType()->isFP128Ty();
+ });
+}
+
static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) {
if (Base < 2 || Base > 36)
// handle special zero base
@@ -334,11 +342,12 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
return ConstantInt::get(CI->getType(), Str1.compare(Str2));
if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
- return B.CreateNeg(
- B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()));
+ return B.CreateNeg(B.CreateZExt(
+ B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType()));
if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
- return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+ return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
+ CI->getType());
// strcmp(P, "x") -> memcmp(P, "x", 2)
uint64_t Len1 = GetStringLength(Str1P);
@@ -398,11 +407,12 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
}
if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
- return B.CreateNeg(
- B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()));
+ return B.CreateNeg(B.CreateZExt(
+ B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType()));
if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
- return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+ return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
+ CI->getType());
uint64_t Len1 = GetStringLength(Str1P);
uint64_t Len2 = GetStringLength(Str2P);
@@ -591,7 +601,8 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B,
// strlen(x) != 0 --> *x != 0
// strlen(x) == 0 --> *x == 0
if (isOnlyUsedInZeroEqualityComparison(CI))
- return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
+ return B.CreateZExt(B.CreateLoad(B.getIntNTy(CharSize), Src, "strlenfirst"),
+ CI->getType());
return nullptr;
}
@@ -735,7 +746,8 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
// strstr("abcd", "bc") -> gep((char*)"abcd", 1)
Value *Result = castToCStr(CI->getArgOperand(0), B);
- Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
+ Result =
+ B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), Result, Offset, "strstr");
return B.CreateBitCast(Result, CI->getType());
}
@@ -773,7 +785,8 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
// It would be really nice to reuse switch lowering here but we can't change
// the CFG at this point.
//
- // memchr("\r\n", C, 2) != nullptr -> (C & ((1 << '\r') | (1 << '\n'))) != 0
+ // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n')))
+ // != 0
// after bounds check.
if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) {
unsigned char Max =
@@ -828,27 +841,20 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr");
}
-Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
- Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
-
- if (LHS == RHS) // memcmp(s,s,x) -> 0
- return Constant::getNullValue(CI->getType());
-
- // Make sure we have a constant length.
- ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
- if (!LenC)
- return nullptr;
-
- uint64_t Len = LenC->getZExtValue();
+static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
+ uint64_t Len, IRBuilder<> &B,
+ const DataLayout &DL) {
if (Len == 0) // memcmp(s1,s2,0) -> 0
return Constant::getNullValue(CI->getType());
// memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
if (Len == 1) {
- Value *LHSV = B.CreateZExt(B.CreateLoad(castToCStr(LHS, B), "lhsc"),
- CI->getType(), "lhsv");
- Value *RHSV = B.CreateZExt(B.CreateLoad(castToCStr(RHS, B), "rhsc"),
- CI->getType(), "rhsv");
+ Value *LHSV =
+ B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(LHS, B), "lhsc"),
+ CI->getType(), "lhsv");
+ Value *RHSV =
+ B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(RHS, B), "rhsc"),
+ CI->getType(), "rhsv");
return B.CreateSub(LHSV, RHSV, "chardiff");
}
@@ -878,12 +884,12 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
if (!LHSV) {
Type *LHSPtrTy =
IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
- LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy), "lhsv");
+ LHSV = B.CreateLoad(IntType, B.CreateBitCast(LHS, LHSPtrTy), "lhsv");
}
if (!RHSV) {
Type *RHSPtrTy =
IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
- RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy), "rhsv");
+ RHSV = B.CreateLoad(IntType, B.CreateBitCast(RHS, RHSPtrTy), "rhsv");
}
return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
}
@@ -907,10 +913,48 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
Ret = 1;
return ConstantInt::get(CI->getType(), Ret);
}
+ return nullptr;
+}
+
+// Most simplifications for memcmp also apply to bcmp.
+Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI,
+ IRBuilder<> &B) {
+ Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
+ Value *Size = CI->getArgOperand(2);
+
+ if (LHS == RHS) // memcmp(s,s,x) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // Handle constant lengths.
+ if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size))
+ if (Value *Res = optimizeMemCmpConstantSize(CI, LHS, RHS,
+ LenC->getZExtValue(), B, DL))
+ return Res;
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
+ if (Value *V = optimizeMemCmpBCmpCommon(CI, B))
+ return V;
+
+ // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0
+ // `bcmp` can be more efficient than memcmp because it only has to know that
+ // there is a difference, not where it is.
+ if (isOnlyUsedInZeroEqualityComparison(CI) && TLI->has(LibFunc_bcmp)) {
+ Value *LHS = CI->getArgOperand(0);
+ Value *RHS = CI->getArgOperand(1);
+ Value *Size = CI->getArgOperand(2);
+ return emitBCmp(LHS, RHS, Size, B, DL, TLI);
+ }
return nullptr;
}
+Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilder<> &B) {
+ return optimizeMemCmpBCmpCommon(CI, B);
+}
+
Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
// memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n)
B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
@@ -1031,7 +1075,8 @@ static Value *valueHasFloatPrecision(Value *Val) {
/// Shrink double -> float functions.
static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
bool isBinary, bool isPrecise = false) {
- if (!CI->getType()->isDoubleTy())
+ Function *CalleeFn = CI->getCalledFunction();
+ if (!CI->getType()->isDoubleTy() || !CalleeFn)
return nullptr;
// If not all the uses of the function are converted to float, then bail out.
@@ -1051,15 +1096,16 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
if (!V[0] || (isBinary && !V[1]))
return nullptr;
+ StringRef CalleeNm = CalleeFn->getName();
+ AttributeList CalleeAt = CalleeFn->getAttributes();
+ bool CalleeIn = CalleeFn->isIntrinsic();
+
// If call isn't an intrinsic, check that it isn't within a function with the
// same name as the float version of this call, otherwise the result is an
// infinite loop. For example, from MinGW-w64:
//
// float expf(float val) { return (float) exp((double) val); }
- Function *CalleeFn = CI->getCalledFunction();
- StringRef CalleeNm = CalleeFn->getName();
- AttributeList CalleeAt = CalleeFn->getAttributes();
- if (CalleeFn && !CalleeFn->isIntrinsic()) {
+ if (!CalleeIn) {
const Function *Fn = CI->getFunction();
StringRef FnName = Fn->getName();
if (FnName.back() == 'f' &&
@@ -1074,7 +1120,7 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
// g((double) float) -> (double) gf(float)
Value *R;
- if (CalleeFn->isIntrinsic()) {
+ if (CalleeIn) {
Module *M = CI->getModule();
Intrinsic::ID IID = CalleeFn->getIntrinsicID();
Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
@@ -1132,10 +1178,10 @@ static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func,
IRBuilder<> &B) {
if (!isa<FPMathOperator>(Call))
return nullptr;
-
+
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(Call->getFastMathFlags());
-
+
// TODO: Can this be shared to also handle LLVM intrinsics?
Value *X;
switch (Func) {
@@ -1189,7 +1235,8 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
}
/// Use exp{,2}(x * y) for pow(exp{,2}(x), y);
-/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x).
+/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x);
+/// exp2(log2(n) * x) for pow(n, x).
Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
@@ -1276,12 +1323,12 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
APFloat BaseR = APFloat(1.0);
BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
BaseR = BaseR / *BaseF;
- bool IsInteger = BaseF->isInteger(),
- IsReciprocal = BaseR.isInteger();
+ bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger();
const APFloat *NF = IsReciprocal ? &BaseR : BaseF;
APSInt NI(64, false);
if ((IsInteger || IsReciprocal) &&
- !NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) &&
+ NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) ==
+ APFloat::opOK &&
NI > 1 && NI.isPowerOf2()) {
double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul");
@@ -1301,6 +1348,28 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f,
LibFunc_exp10l, B, Attrs);
+ // pow(n, x) -> exp2(log2(n) * x)
+ if (Pow->hasOneUse() && Pow->hasApproxFunc() && Pow->hasNoNaNs() &&
+ Pow->hasNoInfs() && BaseF->isNormal() && !BaseF->isNegative()) {
+ Value *Log = nullptr;
+ if (Ty->isFloatTy())
+ Log = ConstantFP::get(Ty, std::log2(BaseF->convertToFloat()));
+ else if (Ty->isDoubleTy())
+ Log = ConstantFP::get(Ty, std::log2(BaseF->convertToDouble()));
+
+ if (Log) {
+ Value *FMul = B.CreateFMul(Log, Expo, "mul");
+ if (Pow->doesNotAccessMemory()) {
+ return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty),
+ FMul, "exp2");
+ } else {
+ if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f,
+ LibFunc_exp2l))
+ return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
+ LibFunc_exp2l, B, Attrs);
+ }
+ }
+ }
return nullptr;
}
@@ -1364,12 +1433,22 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
return Sqrt;
}
+static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M,
+ IRBuilder<> &B) {
+ Value *Args[] = {Base, Expo};
+ Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType());
+ return B.CreateCall(F, Args);
+}
+
Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
- Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+ Value *Base = Pow->getArgOperand(0);
+ Value *Expo = Pow->getArgOperand(1);
Function *Callee = Pow->getCalledFunction();
StringRef Name = Callee->getName();
Type *Ty = Pow->getType();
+ Module *M = Pow->getModule();
Value *Shrunk = nullptr;
+ bool AllowApprox = Pow->hasApproxFunc();
bool Ignored;
// Bail out if simplifying libcalls to pow() is disabled.
@@ -1382,8 +1461,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
// Shrink pow() to powf() if the arguments are single precision,
// unless the result is expected to be double precision.
- if (UnsafeFPShrink &&
- Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name))
+ if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) &&
+ hasFloatVersion(Name))
Shrunk = optimizeBinaryDoubleFP(Pow, B, true);
// Evaluate special cases related to the base.
@@ -1403,7 +1482,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
// pow(x, 0.0) -> 1.0
if (match(Expo, m_SpecificFP(0.0)))
- return ConstantFP::get(Ty, 1.0);
+ return ConstantFP::get(Ty, 1.0);
// pow(x, 1.0) -> x
if (match(Expo, m_FPOne()))
@@ -1418,7 +1497,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
// pow(x, n) -> x * x * x * ...
const APFloat *ExpoF;
- if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) {
+ if (AllowApprox && match(Expo, m_APFloat(ExpoF))) {
// We limit to a max of 7 multiplications, thus the maximum exponent is 32.
// If the exponent is an integer+0.5 we generate a call to sqrt and an
// additional fmul.
@@ -1442,9 +1521,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
if (!Expo2.isInteger())
return nullptr;
- Sqrt =
- getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
- Pow->doesNotAccessMemory(), Pow->getModule(), B, TLI);
+ Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
+ Pow->doesNotAccessMemory(), M, B, TLI);
}
// We will memoize intermediate products of the Addition Chain.
@@ -1467,6 +1545,29 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
return FMul;
}
+
+ APSInt IntExpo(32, /*isUnsigned=*/false);
+ // powf(x, n) -> powi(x, n) if n is a constant signed integer value
+ if (ExpoF->isInteger() &&
+ ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
+ APFloat::opOK) {
+ return createPowWithIntegerExponent(
+ Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B);
+ }
+ }
+
+ // powf(x, itofp(y)) -> powi(x, y)
+ if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
+ Value *IntExpo = cast<Instruction>(Expo)->getOperand(0);
+ Value *NewExpo = nullptr;
+ unsigned BitWidth = IntExpo->getType()->getPrimitiveSizeInBits();
+ if (isa<SIToFPInst>(Expo) && BitWidth == 32)
+ NewExpo = IntExpo;
+ else if (BitWidth < 32)
+ NewExpo = isa<SIToFPInst>(Expo) ? B.CreateSExt(IntExpo, B.getInt32Ty())
+ : B.CreateZExt(IntExpo, B.getInt32Ty());
+ if (NewExpo)
+ return createPowWithIntegerExponent(Base, NewExpo, M, B);
}
return Shrunk;
@@ -1504,9 +1605,8 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
One = ConstantExpr::getFPExtend(One, Op->getType());
Module *M = CI->getModule();
- Value *NewCallee =
- M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(),
- Op->getType(), B.getInt32Ty());
+ FunctionCallee NewCallee = M->getOrInsertFunction(
+ TLI->getName(LdExp), Op->getType(), Op->getType(), B.getInt32Ty());
CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg});
if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -1518,40 +1618,30 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
// If we can shrink the call to a float function rather than a double
// function, do that first.
+ Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name))
if (Value *Ret = optimizeBinaryDoubleFP(CI, B))
return Ret;
+ // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to
+ // the intrinsics for improved optimization (for example, vectorization).
+ // No-signed-zeros is implied by the definitions of fmax/fmin themselves.
+ // From the C standard draft WG14/N1256:
+ // "Ideally, fmax would be sensitive to the sign of zero, for example
+ // fmax(-0.0, +0.0) would return +0; however, implementation in software
+ // might be impractical."
IRBuilder<>::FastMathFlagGuard Guard(B);
- FastMathFlags FMF;
- if (CI->isFast()) {
- // If the call is 'fast', then anything we create here will also be 'fast'.
- FMF.setFast();
- } else {
- // At a minimum, no-nans-fp-math must be true.
- if (!CI->hasNoNaNs())
- return nullptr;
- // No-signed-zeros is implied by the definitions of fmax/fmin themselves:
- // "Ideally, fmax would be sensitive to the sign of zero, for example
- // fmax(-0. 0, +0. 0) would return +0; however, implementation in software
- // might be impractical."
- FMF.setNoSignedZeros();
- FMF.setNoNaNs();
- }
+ FastMathFlags FMF = CI->getFastMathFlags();
+ FMF.setNoSignedZeros();
B.setFastMathFlags(FMF);
- // We have a relaxed floating-point environment. We can ignore NaN-handling
- // and transform to a compare and select. We do not have to consider errno or
- // exceptions, because fmin/fmax do not have those.
- Value *Op0 = CI->getArgOperand(0);
- Value *Op1 = CI->getArgOperand(1);
- Value *Cmp = Callee->getName().startswith("fmin") ?
- B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1);
- return B.CreateSelect(Cmp, Op0, Op1);
+ Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum
+ : Intrinsic::maxnum;
+ Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType());
+ return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) });
}
Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
@@ -1654,13 +1744,13 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
// replace it with the fabs of that factor.
Module *M = Callee->getParent();
Type *ArgType = I->getType();
- Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
+ Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
if (OtherOp) {
// If we found a non-repeated factor, we still need to get its square
// root. We then multiply that by the value that was simplified out
// of the square root calculation.
- Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
+ Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
return B.CreateFMul(FabsCall, SqrtCall);
}
@@ -1728,8 +1818,8 @@ static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
}
Module *M = OrigCallee->getParent();
- Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(),
- ResTy, ArgTy);
+ FunctionCallee Callee =
+ M->getOrInsertFunction(Name, OrigCallee->getAttributes(), ResTy, ArgTy);
if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
// If the argument is an instruction, it must dominate all uses so put our
@@ -1840,8 +1930,8 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
// ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
Value *Op = CI->getArgOperand(0);
Type *ArgType = Op->getType();
- Value *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
- Intrinsic::cttz, ArgType);
+ Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
+ Intrinsic::cttz, ArgType);
Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz");
V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
V = B.CreateIntCast(V, B.getInt32Ty(), false);
@@ -1854,8 +1944,8 @@ Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilder<> &B) {
// fls(x) -> (i32)(sizeInBits(x) - llvm.ctlz(x, false))
Value *Op = CI->getArgOperand(0);
Type *ArgType = Op->getType();
- Value *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
- Intrinsic::ctlz, ArgType);
+ Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
+ Intrinsic::ctlz, ArgType);
Value *V = B.CreateCall(F, {Op, B.getFalse()}, "ctlz");
V = B.CreateSub(ConstantInt::get(V->getType(), ArgType->getIntegerBitWidth()),
V);
@@ -2026,13 +2116,27 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) {
// arguments.
if (TLI->has(LibFunc_iprintf) && !callHasFloatingPointArgument(CI)) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
- Constant *IPrintFFn =
+ FunctionCallee IPrintFFn =
M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
CallInst *New = cast<CallInst>(CI->clone());
New->setCalledFunction(IPrintFFn);
B.Insert(New);
return New;
}
+
+ // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point
+ // arguments.
+ if (TLI->has(LibFunc_small_printf) && !callHasFP128Argument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ auto SmallPrintFFn =
+ M->getOrInsertFunction(TLI->getName(LibFunc_small_printf),
+ FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(SmallPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+
return nullptr;
}
@@ -2077,7 +2181,8 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
}
if (FormatStr[1] == 's') {
- // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
+ // sprintf(dest, "%s", str) -> llvm.memcpy(align 1 dest, align 1 str,
+ // strlen(str)+1)
if (!CI->getArgOperand(2)->getType()->isPointerTy())
return nullptr;
@@ -2105,13 +2210,27 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) {
// point arguments.
if (TLI->has(LibFunc_siprintf) && !callHasFloatingPointArgument(CI)) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
- Constant *SIPrintFFn =
+ FunctionCallee SIPrintFFn =
M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
CallInst *New = cast<CallInst>(CI->clone());
New->setCalledFunction(SIPrintFFn);
B.Insert(New);
return New;
}
+
+ // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit
+ // floating point arguments.
+ if (TLI->has(LibFunc_small_sprintf) && !callHasFP128Argument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ auto SmallSPrintFFn =
+ M->getOrInsertFunction(TLI->getName(LibFunc_small_sprintf),
+ FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(SmallSPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+
return nullptr;
}
@@ -2140,7 +2259,7 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) {
else if (N < FormatStr.size() + 1)
return nullptr;
- // sprintf(str, size, fmt) -> llvm.memcpy(align 1 str, align 1 fmt,
+ // snprintf(dst, size, fmt) -> llvm.memcpy(align 1 dst, align 1 fmt,
// strlen(fmt)+1)
B.CreateMemCpy(
CI->getArgOperand(0), 1, CI->getArgOperand(2), 1,
@@ -2262,13 +2381,27 @@ Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) {
// floating point arguments.
if (TLI->has(LibFunc_fiprintf) && !callHasFloatingPointArgument(CI)) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
- Constant *FIPrintFFn =
+ FunctionCallee FIPrintFFn =
M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
CallInst *New = cast<CallInst>(CI->clone());
New->setCalledFunction(FIPrintFFn);
B.Insert(New);
return New;
}
+
+ // fprintf(stream, format, ...) -> __small_fprintf(stream, format, ...) if no
+ // 128-bit floating point arguments.
+ if (TLI->has(LibFunc_small_fprintf) && !callHasFP128Argument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ auto SmallFPrintFFn =
+ M->getOrInsertFunction(TLI->getName(LibFunc_small_fprintf),
+ FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(SmallFPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+
return nullptr;
}
@@ -2288,7 +2421,8 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) {
// If this is writing one byte, turn it into fputc.
// This optimisation is only valid, if the return value is unused.
if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
- Value *Char = B.CreateLoad(castToCStr(CI->getArgOperand(0), B), "char");
+ Value *Char = B.CreateLoad(B.getInt8Ty(),
+ castToCStr(CI->getArgOperand(0), B), "char");
Value *NewCI = emitFPutC(Char, CI->getArgOperand(3), B, TLI);
return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
}
@@ -2307,7 +2441,9 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
// Don't rewrite fputs to fwrite when optimising for size because fwrite
// requires more arguments and thus extra MOVs are required.
- if (CI->getFunction()->optForSize())
+ bool OptForSize = CI->getFunction()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
+ if (OptForSize)
return nullptr;
// Check if has any use
@@ -2320,7 +2456,7 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
- // fputs(s,F) --> fwrite(s,1,strlen(s),F)
+ // fputs(s,F) --> fwrite(s,strlen(s),1,F)
uint64_t Len = GetStringLength(CI->getArgOperand(0));
if (!Len)
return nullptr;
@@ -2367,18 +2503,14 @@ Value *LibCallSimplifier::optimizeFRead(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
- // Check for a constant string.
- StringRef Str;
- if (!getConstantStringInfo(CI->getArgOperand(0), Str))
+ if (!CI->use_empty())
return nullptr;
- if (Str.empty() && CI->use_empty()) {
- // puts("") -> putchar('\n')
- Value *Res = emitPutChar(B.getInt32('\n'), B, TLI);
- if (CI->use_empty() || !Res)
- return Res;
- return B.CreateIntCast(Res, CI->getType(), true);
- }
+ // Check for a constant string.
+ // puts("") -> putchar('\n')
+ StringRef Str;
+ if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty())
+ return emitPutChar(B.getInt32('\n'), B, TLI);
return nullptr;
}
@@ -2441,6 +2573,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
return optimizeStrStr(CI, Builder);
case LibFunc_memchr:
return optimizeMemChr(CI, Builder);
+ case LibFunc_bcmp:
+ return optimizeBCmp(CI, Builder);
case LibFunc_memcmp:
return optimizeMemCmp(CI, Builder);
case LibFunc_memcpy:
@@ -2686,9 +2820,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
LibCallSimplifier::LibCallSimplifier(
const DataLayout &DL, const TargetLibraryInfo *TLI,
OptimizationRemarkEmitter &ORE,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
function_ref<void(Instruction *, Value *)> Replacer,
function_ref<void(Instruction *)> Eraser)
- : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE),
+ : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI),
UnsafeFPShrink(false), Replacer(Replacer), Eraser(Eraser) {}
void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
@@ -2735,12 +2870,23 @@ void LibCallSimplifier::eraseFromParent(Instruction *I) {
// Fortified Library Call Optimizations
//===----------------------------------------------------------------------===//
-bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
- unsigned ObjSizeOp,
- unsigned SizeOp,
- bool isString) {
- if (CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(SizeOp))
+bool
+FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
+ unsigned ObjSizeOp,
+ Optional<unsigned> SizeOp,
+ Optional<unsigned> StrOp,
+ Optional<unsigned> FlagOp) {
+ // If this function takes a flag argument, the implementation may use it to
+ // perform extra checks. Don't fold into the non-checking variant.
+ if (FlagOp) {
+ ConstantInt *Flag = dyn_cast<ConstantInt>(CI->getArgOperand(*FlagOp));
+ if (!Flag || !Flag->isZero())
+ return false;
+ }
+
+ if (SizeOp && CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(*SizeOp))
return true;
+
if (ConstantInt *ObjSizeCI =
dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) {
if (ObjSizeCI->isMinusOne())
@@ -2748,23 +2894,27 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
// If the object size wasn't -1 (unknown), bail out if we were asked to.
if (OnlyLowerUnknownSize)
return false;
- if (isString) {
- uint64_t Len = GetStringLength(CI->getArgOperand(SizeOp));
+ if (StrOp) {
+ uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp));
// If the length is 0 we don't know how long it is and so we can't
// remove the check.
if (Len == 0)
return false;
return ObjSizeCI->getZExtValue() >= Len;
}
- if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeOp)))
- return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue();
+
+ if (SizeOp) {
+ if (ConstantInt *SizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(*SizeOp)))
+ return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue();
+ }
}
return false;
}
Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
IRBuilder<> &B) {
- if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+ if (isFortifiedCallFoldable(CI, 3, 2)) {
B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
CI->getArgOperand(2));
return CI->getArgOperand(0);
@@ -2774,7 +2924,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
IRBuilder<> &B) {
- if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+ if (isFortifiedCallFoldable(CI, 3, 2)) {
B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
CI->getArgOperand(2));
return CI->getArgOperand(0);
@@ -2786,7 +2936,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
IRBuilder<> &B) {
// TODO: Try foldMallocMemset() here.
- if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+ if (isFortifiedCallFoldable(CI, 3, 2)) {
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
@@ -2797,8 +2947,6 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
IRBuilder<> &B,
LibFunc Func) {
- Function *Callee = CI->getCalledFunction();
- StringRef Name = Callee->getName();
const DataLayout &DL = CI->getModule()->getDataLayout();
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1),
*ObjSize = CI->getArgOperand(2);
@@ -2814,8 +2962,12 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
// st[rp]cpy_chk call which may fail at runtime if the size is too long.
// TODO: It might be nice to get a maximum length out of the possible
// string lengths for varying.
- if (isFortifiedCallFoldable(CI, 2, 1, true))
- return emitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6));
+ if (isFortifiedCallFoldable(CI, 2, None, 1)) {
+ if (Func == LibFunc_strcpy_chk)
+ return emitStrCpy(Dst, Src, B, TLI);
+ else
+ return emitStpCpy(Dst, Src, B, TLI);
+ }
if (OnlyLowerUnknownSize)
return nullptr;
@@ -2838,13 +2990,99 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
IRBuilder<> &B,
LibFunc Func) {
- Function *Callee = CI->getCalledFunction();
- StringRef Name = Callee->getName();
- if (isFortifiedCallFoldable(CI, 3, 2, false)) {
- Value *Ret = emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TLI, Name.substr(2, 7));
- return Ret;
+ if (isFortifiedCallFoldable(CI, 3, 2)) {
+ if (Func == LibFunc_strncpy_chk)
+ return emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI);
+ else
+ return emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI);
}
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 4, 3))
+ return emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), CI->getArgOperand(3), B, TLI);
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) {
+ SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 5, CI->arg_end());
+ return emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(4), VariadicArgs, B, TLI);
+ }
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 2, None, None, 1)) {
+ SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 4, CI->arg_end());
+ return emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), VariadicArgs,
+ B, TLI);
+ }
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 2))
+ return emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI);
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 3))
+ return emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI);
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 3))
+ return emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI);
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 3))
+ return emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI);
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 3, 1, None, 2))
+ return emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(4), CI->getArgOperand(5), B, TLI);
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 2, None, None, 1))
+ return emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
+ CI->getArgOperand(4), B, TLI);
+
return nullptr;
}
@@ -2892,6 +3130,24 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
case LibFunc_stpncpy_chk:
case LibFunc_strncpy_chk:
return optimizeStrpNCpyChk(CI, Builder, Func);
+ case LibFunc_memccpy_chk:
+ return optimizeMemCCpyChk(CI, Builder);
+ case LibFunc_snprintf_chk:
+ return optimizeSNPrintfChk(CI, Builder);
+ case LibFunc_sprintf_chk:
+ return optimizeSPrintfChk(CI, Builder);
+ case LibFunc_strcat_chk:
+ return optimizeStrCatChk(CI, Builder);
+ case LibFunc_strlcat_chk:
+ return optimizeStrLCat(CI, Builder);
+ case LibFunc_strncat_chk:
+ return optimizeStrNCatChk(CI, Builder);
+ case LibFunc_strlcpy_chk:
+ return optimizeStrLCpyChk(CI, Builder);
+ case LibFunc_vsnprintf_chk:
+ return optimizeVSNPrintfChk(CI, Builder);
+ case LibFunc_vsprintf_chk:
+ return optimizeVSPrintfChk(CI, Builder);
default:
break;
}
diff --git a/lib/Transforms/Utils/SizeOpts.cpp b/lib/Transforms/Utils/SizeOpts.cpp
new file mode 100644
index 000000000000..1519751197d2
--- /dev/null
+++ b/lib/Transforms/Utils/SizeOpts.cpp
@@ -0,0 +1,37 @@
+//===-- SizeOpts.cpp - code size optimization related code ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some shared code size optimization related code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
+using namespace llvm;
+
+static cl::opt<bool> ProfileGuidedSizeOpt(
+ "pgso", cl::Hidden, cl::init(true),
+ cl::desc("Enable the profile guided size optimization. "));
+
+bool llvm::shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
+ assert(F);
+ if (!PSI || !BFI || !PSI->hasProfileSummary())
+ return false;
+ return ProfileGuidedSizeOpt && PSI->isFunctionColdInCallGraph(F, *BFI);
+}
+
+bool llvm::shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
+ assert(BB);
+ if (!PSI || !BFI || !PSI->hasProfileSummary())
+ return false;
+ return ProfileGuidedSizeOpt && PSI->isColdBlock(BB, BFI);
+}
diff --git a/lib/Transforms/Utils/SplitModule.cpp b/lib/Transforms/Utils/SplitModule.cpp
index 5db4d2e4df9d..e2c387cb8983 100644
--- a/lib/Transforms/Utils/SplitModule.cpp
+++ b/lib/Transforms/Utils/SplitModule.cpp
@@ -1,9 +1,8 @@
//===- SplitModule.cpp - Split a module into partitions -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/StripGCRelocates.cpp b/lib/Transforms/Utils/StripGCRelocates.cpp
index ac0b519f4a77..50844cf9d1c5 100644
--- a/lib/Transforms/Utils/StripGCRelocates.cpp
+++ b/lib/Transforms/Utils/StripGCRelocates.cpp
@@ -1,9 +1,8 @@
//===- StripGCRelocates.cpp - Remove gc.relocates inserted by RewriteStatePoints===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
index 8956a089a99c..97a4533fabe5 100644
--- a/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
+++ b/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
@@ -1,9 +1,8 @@
//===- StripNonLineTableDebugInfo.cpp -- Strip parts of Debug Info --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp
index fd0da79487f1..456724779b43 100644
--- a/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -1,9 +1,8 @@
//===- SymbolRewriter.cpp - Symbol Rewriter -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index d49b26472548..7f7bdf8a3d6d 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -1,9 +1,8 @@
//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp
index 95416de07439..5272ab6e95d5 100644
--- a/lib/Transforms/Utils/Utils.cpp
+++ b/lib/Transforms/Utils/Utils.cpp
@@ -1,9 +1,8 @@
//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -55,3 +54,6 @@ void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createPromoteMemoryToRegisterPass());
}
+void LLVMAddAddDiscriminatorsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createAddDiscriminatorsPass());
+}
diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
index 948d9bd5baad..a77bf50fe10b 100644
--- a/lib/Transforms/Utils/VNCoercion.cpp
+++ b/lib/Transforms/Utils/VNCoercion.cpp
@@ -14,13 +14,17 @@ namespace VNCoercion {
/// Return true if coerceAvailableValueToLoadType will succeed.
bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
const DataLayout &DL) {
+ Type *StoredTy = StoredVal->getType();
+ if (StoredTy == LoadTy)
+ return true;
+
// If the loaded or stored value is an first class array or struct, don't try
// to transform them. We need to be able to bitcast to integer.
- if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
- StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
+ if (LoadTy->isStructTy() || LoadTy->isArrayTy() || StoredTy->isStructTy() ||
+ StoredTy->isArrayTy())
return false;
- uint64_t StoreSize = DL.getTypeSizeInBits(StoredVal->getType());
+ uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy);
// The store size must be byte-aligned to support future type casts.
if (llvm::alignTo(StoreSize, 8) != StoreSize)
@@ -31,10 +35,16 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
return false;
// Don't coerce non-integral pointers to integers or vice versa.
- if (DL.isNonIntegralPointerType(StoredVal->getType()) !=
- DL.isNonIntegralPointerType(LoadTy))
+ if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) !=
+ DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
+ // As a special case, allow coercion of memset used to initialize
+ // an array w/null. Despite non-integral pointers not generally having a
+ // specific bit pattern, we do assume null is zero.
+ if (auto *CI = dyn_cast<Constant>(StoredVal))
+ return CI->isNullValue();
return false;
-
+ }
+
return true;
}
@@ -207,11 +217,22 @@ static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
/// memdep query of a load that ends up being a clobbering store.
int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
StoreInst *DepSI, const DataLayout &DL) {
+ auto *StoredVal = DepSI->getValueOperand();
+
// Cannot handle reading from store of first-class aggregate yet.
- if (DepSI->getValueOperand()->getType()->isStructTy() ||
- DepSI->getValueOperand()->getType()->isArrayTy())
+ if (StoredVal->getType()->isStructTy() ||
+ StoredVal->getType()->isArrayTy())
return -1;
+ // Don't coerce non-integral pointers to integers or vice versa.
+ if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) !=
+ DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
+ // Allow casts of zero values to null as a special case
+ auto *CI = dyn_cast<Constant>(StoredVal);
+ if (!CI || !CI->isNullValue())
+ return -1;
+ }
+
Value *StorePtr = DepSI->getPointerOperand();
uint64_t StoreSize =
DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
@@ -228,6 +249,11 @@ int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
return -1;
+ // Don't coerce non-integral pointers to integers or vice versa.
+ if (DL.isNonIntegralPointerType(DepLI->getType()->getScalarType()) !=
+ DL.isNonIntegralPointerType(LoadTy->getScalarType()))
+ return -1;
+
Value *DepPtr = DepLI->getPointerOperand();
uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType());
int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
@@ -264,9 +290,15 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
// If this is memset, we just need to see if the offset is valid in the size
// of the memset..
- if (MI->getIntrinsicID() == Intrinsic::memset)
+ if (MI->getIntrinsicID() == Intrinsic::memset) {
+ if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
+ auto *CI = dyn_cast<ConstantInt>(cast<MemSetInst>(MI)->getValue());
+ if (!CI || !CI->isZero())
+ return -1;
+ }
return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
MemSizeInBits, DL);
+ }
// If we have a memcpy/memmove, the only case we can handle is if this is a
// copy from constant memory. In that case, we can read directly from the
@@ -278,7 +310,7 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
return -1;
GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL));
- if (!GV || !GV->isConstant())
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
return -1;
// See if the access is within the bounds of the transfer.
@@ -287,6 +319,12 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
if (Offset == -1)
return Offset;
+ // Don't coerce non-integral pointers to integers or vice versa, and the
+ // memtransfer is implicitly a raw byte code
+ if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))
+ // TODO: Can allow nullptrs from constant zeros
+ return -1;
+
unsigned AS = Src->getType()->getPointerAddressSpace();
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
@@ -386,12 +424,12 @@ Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
// memdep queries will find the new load. We can't easily remove the old
// load completely because it is already in the value numbering table.
IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
- Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8);
- DestPTy =
- PointerType::get(DestPTy, PtrVal->getType()->getPointerAddressSpace());
+ Type *DestTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8);
+ Type *DestPTy =
+ PointerType::get(DestTy, PtrVal->getType()->getPointerAddressSpace());
Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
- LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
+ LoadInst *NewLoad = Builder.CreateLoad(DestTy, PtrVal);
NewLoad->takeName(SrcVal);
NewLoad->setAlignment(SrcVal->getAlignment());
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 55fff3f3872a..fbc3407c301f 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -1,9 +1,8 @@
//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -914,6 +913,21 @@ void Mapper::remapInstruction(Instruction *I) {
Tys.push_back(TypeMapper->remapType(Ty));
CS.mutateFunctionType(FunctionType::get(
TypeMapper->remapType(I->getType()), Tys, FTy->isVarArg()));
+
+ LLVMContext &C = CS->getContext();
+ AttributeList Attrs = CS.getAttributes();
+ for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
+ if (Attrs.hasAttribute(i, Attribute::ByVal)) {
+ Type *Ty = Attrs.getAttribute(i, Attribute::ByVal).getValueAsType();
+ if (!Ty)
+ continue;
+
+ Attrs = Attrs.removeAttribute(C, i, Attribute::ByVal);
+ Attrs = Attrs.addAttribute(
+ C, i, Attribute::getWithByValType(C, TypeMapper->remapType(Ty)));
+ }
+ }
+ CS.setAttributes(Attrs);
return;
}
if (auto *AI = dyn_cast<AllocaInst>(I))
diff --git a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 9ff18328c219..4273080ddd91 100644
--- a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -1,9 +1,8 @@
//===- LoadStoreVectorizer.cpp - GPU Load & Store Vectorizer --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -927,7 +926,7 @@ bool Vectorizer::vectorizeStoreChain(
StoreInst *S0 = cast<StoreInst>(Chain[0]);
// If the vector has an int element, default to int for the whole store.
- Type *StoreTy;
+ Type *StoreTy = nullptr;
for (Instruction *I : Chain) {
StoreTy = cast<StoreInst>(I)->getValueOperand()->getType();
if (StoreTy->isIntOrIntVectorTy())
@@ -939,6 +938,7 @@ bool Vectorizer::vectorizeStoreChain(
break;
}
}
+ assert(StoreTy && "Failed to find store type");
unsigned Sz = DL.getTypeSizeInBits(StoreTy);
unsigned AS = S0->getPointerAddressSpace();
@@ -1152,13 +1152,8 @@ bool Vectorizer::vectorizeLoadChain(
vectorizeLoadChain(Chains.second, InstructionsProcessed);
}
- unsigned NewAlign = getOrEnforceKnownAlignment(L0->getPointerOperand(),
- StackAdjustedAlignment,
- DL, L0, nullptr, &DT);
- if (NewAlign != 0)
- Alignment = NewAlign;
-
- Alignment = NewAlign;
+ Alignment = getOrEnforceKnownAlignment(
+ L0->getPointerOperand(), StackAdjustedAlignment, DL, L0, nullptr, &DT);
}
if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) {
@@ -1182,7 +1177,7 @@ bool Vectorizer::vectorizeLoadChain(
Value *Bitcast =
Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS));
- LoadInst *LI = Builder.CreateAlignedLoad(Bitcast, Alignment);
+ LoadInst *LI = Builder.CreateAlignedLoad(VecTy, Bitcast, Alignment);
propagateMetadata(LI, Chain);
if (VecLoadTy) {
diff --git a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index b44fe5a52a2f..6ef8dc2d3cd7 100644
--- a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1,9 +1,8 @@
//===- LoopVectorizationLegality.cpp --------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,6 +22,8 @@ using namespace llvm;
#define LV_NAME "loop-vectorize"
#define DEBUG_TYPE LV_NAME
+extern cl::opt<bool> EnableVPlanPredication;
+
static cl::opt<bool>
EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
cl::desc("Enable if-conversion during vectorization."));
@@ -46,6 +47,18 @@ static const unsigned MaxInterleaveFactor = 16;
namespace llvm {
+#ifndef NDEBUG
+static void debugVectorizationFailure(const StringRef DebugMsg,
+ Instruction *I) {
+ dbgs() << "LV: Not vectorizing: " << DebugMsg;
+ if (I != nullptr)
+ dbgs() << " " << *I;
+ else
+ dbgs() << '.';
+ dbgs() << '\n';
+}
+#endif
+
OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
StringRef RemarkName,
Loop *TheLoop,
@@ -103,6 +116,25 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
<< "LV: Interleaving disabled by the pass manager\n");
}
+void LoopVectorizeHints::setAlreadyVectorized() {
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
+
+ MDNode *IsVectorizedMD = MDNode::get(
+ Context,
+ {MDString::get(Context, "llvm.loop.isvectorized"),
+ ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
+ MDNode *LoopID = TheLoop->getLoopID();
+ MDNode *NewLoopID =
+ makePostTransformationMetadata(Context, LoopID,
+ {Twine(Prefix(), "vectorize.").str(),
+ Twine(Prefix(), "interleave.").str()},
+ {IsVectorizedMD});
+ TheLoop->setLoopID(NewLoopID);
+
+ // Update internal cache.
+ IsVectorized.Value = 1;
+}
+
bool LoopVectorizeHints::allowVectorization(
Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
if (getForce() == LoopVectorizeHints::FK_Disabled) {
@@ -230,57 +262,6 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
}
}
-MDNode *LoopVectorizeHints::createHintMetadata(StringRef Name,
- unsigned V) const {
- LLVMContext &Context = TheLoop->getHeader()->getContext();
- Metadata *MDs[] = {
- MDString::get(Context, Name),
- ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))};
- return MDNode::get(Context, MDs);
-}
-
-bool LoopVectorizeHints::matchesHintMetadataName(MDNode *Node,
- ArrayRef<Hint> HintTypes) {
- MDString *Name = dyn_cast<MDString>(Node->getOperand(0));
- if (!Name)
- return false;
-
- for (auto H : HintTypes)
- if (Name->getString().endswith(H.Name))
- return true;
- return false;
-}
-
-void LoopVectorizeHints::writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
- if (HintTypes.empty())
- return;
-
- // Reserve the first element to LoopID (see below).
- SmallVector<Metadata *, 4> MDs(1);
- // If the loop already has metadata, then ignore the existing operands.
- MDNode *LoopID = TheLoop->getLoopID();
- if (LoopID) {
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
- MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
- // If node in update list, ignore old value.
- if (!matchesHintMetadataName(Node, HintTypes))
- MDs.push_back(Node);
- }
- }
-
- // Now, add the missing hints.
- for (auto H : HintTypes)
- MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value));
-
- // Replace current metadata node with new one.
- LLVMContext &Context = TheLoop->getHeader()->getContext();
- MDNode *NewLoopID = MDNode::get(Context, MDs);
- // Set operand 0 to refer to the loop id itself.
- NewLoopID->replaceOperandWith(0, NewLoopID);
-
- TheLoop->setLoopID(NewLoopID);
-}
-
bool LoopVectorizationRequirements::doesNotMeet(
Function *F, Loop *L, const LoopVectorizeHints &Hints) {
const char *PassName = Hints.vectorizeAnalysisPassName();
@@ -464,6 +445,14 @@ bool LoopVectorizationLegality::isUniform(Value *V) {
return LAI->isUniform(V);
}
+void LoopVectorizationLegality::reportVectorizationFailure(
+ const StringRef DebugMsg, const StringRef OREMsg,
+ const StringRef ORETag, Instruction *I) const {
+ LLVM_DEBUG(debugVectorizationFailure(DebugMsg, I));
+ ORE->emit(createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
+ ORETag, TheLoop, I) << OREMsg);
+}
+
bool LoopVectorizationLegality::canVectorizeOuterLoop() {
assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
// Store the result and return it at the end instead of exiting early, in case
@@ -476,9 +465,9 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
// not supported yet.
auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
if (!Br) {
- LLVM_DEBUG(dbgs() << "LV: Unsupported basic block terminator.\n");
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
+ reportVectorizationFailure("Unsupported basic block terminator",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood");
if (DoExtraAnalysis)
Result = false;
else
@@ -488,13 +477,16 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
// Check whether the BranchInst is a supported one. Only unconditional
// branches, conditional branches with an outer loop invariant condition or
// backedges are supported.
- if (Br && Br->isConditional() &&
+ // FIXME: We skip these checks when VPlan predication is enabled as we
+ // want to allow divergent branches. This whole check will be removed
+ // once VPlan predication is on by default.
+ if (!EnableVPlanPredication && Br && Br->isConditional() &&
!TheLoop->isLoopInvariant(Br->getCondition()) &&
!LI->isLoopHeader(Br->getSuccessor(0)) &&
!LI->isLoopHeader(Br->getSuccessor(1))) {
- LLVM_DEBUG(dbgs() << "LV: Unsupported conditional branch.\n");
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
+ reportVectorizationFailure("Unsupported conditional branch",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood");
if (DoExtraAnalysis)
Result = false;
else
@@ -506,11 +498,9 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
// simple outer loops scenarios with uniform nested loops.
if (!isUniformLoopNest(TheLoop /*loop nest*/,
TheLoop /*context outer loop*/)) {
- LLVM_DEBUG(
- dbgs()
- << "LV: Not vectorizing: Outer loop contains divergent loops.\n");
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
+ reportVectorizationFailure("Outer loop contains divergent loops",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood");
if (DoExtraAnalysis)
Result = false;
else
@@ -519,10 +509,9 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
// Check whether we are able to set up outer loop induction.
if (!setupOuterLoopInductions()) {
- LLVM_DEBUG(
- dbgs() << "LV: Not vectorizing: Unsupported outer loop Phi(s).\n");
- ORE->emit(createMissedAnalysis("UnsupportedPhi")
- << "Unsupported outer loop Phi(s)");
+ reportVectorizationFailure("Unsupported outer loop Phi(s)",
+ "Unsupported outer loop Phi(s)",
+ "UnsupportedPhi");
if (DoExtraAnalysis)
Result = false;
else
@@ -627,9 +616,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Check that this PHI type is allowed.
if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
!PhiTy->isPointerTy()) {
- ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
- << "loop control flow is not understood by vectorizer");
- LLVM_DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
+ reportVectorizationFailure("Found a non-int non-pointer PHI",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood");
return false;
}
@@ -647,9 +636,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// We only allow if-converted PHIs with exactly two incoming values.
if (Phi->getNumIncomingValues() != 2) {
- ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
- << "control flow not understood by vectorizer");
- LLVM_DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
+ reportVectorizationFailure("Found an invalid PHI",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood", Phi);
return false;
}
@@ -698,10 +687,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue;
}
- ORE->emit(createMissedAnalysis("NonReductionValueUsedOutsideLoop", Phi)
- << "value that could not be identified as "
- "reduction is used outside the loop");
- LLVM_DEBUG(dbgs() << "LV: Found an unidentified PHI." << *Phi << "\n");
+ reportVectorizationFailure("Found an unidentified PHI",
+ "value that could not be identified as "
+ "reduction is used outside the loop",
+ "NonReductionValueUsedOutsideLoop", Phi);
return false;
} // end of PHI handling
@@ -728,31 +717,33 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// but it's hard to provide meaningful yet generic advice.
// Also, should this be guarded by allowExtraAnalysis() and/or be part
// of the returned info from isFunctionVectorizable()?
- ORE->emit(createMissedAnalysis("CantVectorizeLibcall", CI)
- << "library call cannot be vectorized. "
- "Try compiling with -fno-math-errno, -ffast-math, "
- "or similar flags");
+ reportVectorizationFailure("Found a non-intrinsic callsite",
+ "library call cannot be vectorized. "
+ "Try compiling with -fno-math-errno, -ffast-math, "
+ "or similar flags",
+ "CantVectorizeLibcall", CI);
} else {
- ORE->emit(createMissedAnalysis("CantVectorizeCall", CI)
- << "call instruction cannot be vectorized");
+ reportVectorizationFailure("Found a non-intrinsic callsite",
+ "call instruction cannot be vectorized",
+ "CantVectorizeLibcall", CI);
}
- LLVM_DEBUG(
- dbgs() << "LV: Found a non-intrinsic callsite.\n");
return false;
}
- // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
- // second argument is the same (i.e. loop invariant)
- if (CI && hasVectorInstrinsicScalarOpd(
- getVectorIntrinsicIDForCall(CI, TLI), 1)) {
+ // Some intrinsics have scalar arguments and should be same in order for
+ // them to be vectorized (i.e. loop invariant).
+ if (CI) {
auto *SE = PSE.getSE();
- if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) {
- ORE->emit(createMissedAnalysis("CantVectorizeIntrinsic", CI)
- << "intrinsic instruction cannot be vectorized");
- LLVM_DEBUG(dbgs()
- << "LV: Found unvectorizable intrinsic " << *CI << "\n");
- return false;
- }
+ Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+ if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
+ if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
+ reportVectorizationFailure("Found unvectorizable intrinsic",
+ "intrinsic instruction cannot be vectorized",
+ "CantVectorizeIntrinsic", CI);
+ return false;
+ }
+ }
}
// Check that the instruction return type is vectorizable.
@@ -760,9 +751,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if ((!VectorType::isValidElementType(I.getType()) &&
!I.getType()->isVoidTy()) ||
isa<ExtractElementInst>(I)) {
- ORE->emit(createMissedAnalysis("CantVectorizeInstructionReturnType", &I)
- << "instruction return type cannot be vectorized");
- LLVM_DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
+ reportVectorizationFailure("Found unvectorizable type",
+ "instruction return type cannot be vectorized",
+ "CantVectorizeInstructionReturnType", &I);
return false;
}
@@ -770,11 +761,44 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (auto *ST = dyn_cast<StoreInst>(&I)) {
Type *T = ST->getValueOperand()->getType();
if (!VectorType::isValidElementType(T)) {
- ORE->emit(createMissedAnalysis("CantVectorizeStore", ST)
- << "store instruction cannot be vectorized");
+ reportVectorizationFailure("Store instruction cannot be vectorized",
+ "store instruction cannot be vectorized",
+ "CantVectorizeStore", ST);
return false;
}
+ // For nontemporal stores, check that a nontemporal vector version is
+ // supported on the target.
+ if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
+ // Arbitrarily try a vector of 2 elements.
+ Type *VecTy = VectorType::get(T, /*NumElements=*/2);
+ assert(VecTy && "did not find vectorized version of stored type");
+ unsigned Alignment = getLoadStoreAlignment(ST);
+ if (!TTI->isLegalNTStore(VecTy, Alignment)) {
+ reportVectorizationFailure(
+ "nontemporal store instruction cannot be vectorized",
+ "nontemporal store instruction cannot be vectorized",
+ "CantVectorizeNontemporalStore", ST);
+ return false;
+ }
+ }
+
+ } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
+ if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
+ // For nontemporal loads, check that a nontemporal vector version is
+ // supported on the target (arbitrarily try a vector of 2 elements).
+ Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
+ assert(VecTy && "did not find vectorized version of load type");
+ unsigned Alignment = getLoadStoreAlignment(LD);
+ if (!TTI->isLegalNTLoad(VecTy, Alignment)) {
+ reportVectorizationFailure(
+ "nontemporal load instruction cannot be vectorized",
+ "nontemporal load instruction cannot be vectorized",
+ "CantVectorizeNontemporalLoad", LD);
+ return false;
+ }
+ }
+
// FP instructions can allow unsafe algebra, thus vectorizable by
// non-IEEE-754 compliant SIMD units.
// This applies to floating-point math operations and calls, not memory
@@ -797,23 +821,27 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
AllowedExit.insert(&I);
continue;
}
- ORE->emit(createMissedAnalysis("ValueUsedOutsideLoop", &I)
- << "value cannot be used outside the loop");
+ reportVectorizationFailure("Value cannot be used outside the loop",
+ "value cannot be used outside the loop",
+ "ValueUsedOutsideLoop", &I);
return false;
}
} // next instr.
}
if (!PrimaryInduction) {
- LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
if (Inductions.empty()) {
- ORE->emit(createMissedAnalysis("NoInductionVariable")
- << "loop induction variable could not be identified");
+ reportVectorizationFailure("Did not find one integer induction var",
+ "loop induction variable could not be identified",
+ "NoInductionVariable");
return false;
} else if (!WidestIndTy) {
- ORE->emit(createMissedAnalysis("NoIntegerInductionVariable")
- << "integer loop induction variable could not be identified");
+ reportVectorizationFailure("Did not find one integer induction var",
+ "integer loop induction variable could not be identified",
+ "NoIntegerInductionVariable");
return false;
+ } else {
+ LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
}
}
@@ -839,11 +867,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
return false;
if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
- ORE->emit(createMissedAnalysis("CantVectorizeStoreToLoopInvariantAddress")
- << "write to a loop invariant address could not "
- "be vectorized");
- LLVM_DEBUG(
- dbgs() << "LV: Non vectorizable stores to a uniform address\n");
+ reportVectorizationFailure("Stores to a uniform address",
+ "write to a loop invariant address could not be vectorized",
+ "CantVectorizeStoreToLoopInvariantAddress");
return false;
}
Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
@@ -925,8 +951,9 @@ bool LoopVectorizationLegality::blockCanBePredicated(
bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
if (!EnableIfConversion) {
- ORE->emit(createMissedAnalysis("IfConversionDisabled")
- << "if-conversion is disabled");
+ reportVectorizationFailure("If-conversion is disabled",
+ "if-conversion is disabled",
+ "IfConversionDisabled");
return false;
}
@@ -950,21 +977,26 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
for (BasicBlock *BB : TheLoop->blocks()) {
// We don't support switch statements inside loops.
if (!isa<BranchInst>(BB->getTerminator())) {
- ORE->emit(createMissedAnalysis("LoopContainsSwitch", BB->getTerminator())
- << "loop contains a switch statement");
+ reportVectorizationFailure("Loop contains a switch statement",
+ "loop contains a switch statement",
+ "LoopContainsSwitch", BB->getTerminator());
return false;
}
// We must be able to predicate all blocks that need to be predicated.
if (blockNeedsPredication(BB)) {
if (!blockCanBePredicated(BB, SafePointes)) {
- ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
- << "control flow cannot be substituted for a select");
+ reportVectorizationFailure(
+ "Control flow cannot be substituted for a select",
+ "control flow cannot be substituted for a select",
+ "NoCFGForSelect", BB->getTerminator());
return false;
}
} else if (BB != Header && !canIfConvertPHINodes(BB)) {
- ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
- << "control flow cannot be substituted for a select");
+ reportVectorizationFailure(
+ "Control flow cannot be substituted for a select",
+ "control flow cannot be substituted for a select",
+ "NoCFGForSelect", BB->getTerminator());
return false;
}
}
@@ -992,9 +1024,9 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
// We must have a loop in canonical form. Loops with indirectbr in them cannot
// be canonicalized.
if (!Lp->getLoopPreheader()) {
- LLVM_DEBUG(dbgs() << "LV: Loop doesn't have a legal pre-header.\n");
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
+ reportVectorizationFailure("Loop doesn't have a legal pre-header",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood");
if (DoExtraAnalysis)
Result = false;
else
@@ -1003,8 +1035,9 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
// We must have a single backedge.
if (Lp->getNumBackEdges() != 1) {
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
+ reportVectorizationFailure("The loop must have a single backedge",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood");
if (DoExtraAnalysis)
Result = false;
else
@@ -1013,8 +1046,9 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
// We must have a single exiting block.
if (!Lp->getExitingBlock()) {
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
+ reportVectorizationFailure("The loop must have an exiting block",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood");
if (DoExtraAnalysis)
Result = false;
else
@@ -1025,8 +1059,9 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
// checked at the end of each iteration. With that we can assume that all
// instructions in the loop are executed the same number of times.
if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
+ reportVectorizationFailure("The exiting block is not the loop latch",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood");
if (DoExtraAnalysis)
Result = false;
else
@@ -1087,7 +1122,9 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
if (!canVectorizeOuterLoop()) {
- LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Unsupported outer loop.\n");
+ reportVectorizationFailure("Unsupported outer loop",
+ "unsupported outer loop",
+ "UnsupportedOuterLoop");
// TODO: Implement DoExtraAnalysis when subsequent legal checks support
// outer loops.
return false;
@@ -1137,10 +1174,9 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
- ORE->emit(createMissedAnalysis("TooManySCEVRunTimeChecks")
- << "Too many SCEV assumptions need to be made and checked "
- << "at runtime");
- LLVM_DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n");
+ reportVectorizationFailure("Too many SCEV checks needed",
+ "Too many SCEV assumptions need to be made and checked at runtime",
+ "TooManySCEVRunTimeChecks");
if (DoExtraAnalysis)
Result = false;
else
@@ -1159,20 +1195,20 @@ bool LoopVectorizationLegality::canFoldTailByMasking() {
LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
if (!PrimaryInduction) {
- ORE->emit(createMissedAnalysis("NoPrimaryInduction")
- << "Missing a primary induction variable in the loop, which is "
- << "needed in order to fold tail by masking as required.");
- LLVM_DEBUG(dbgs() << "LV: No primary induction, cannot fold tail by "
- << "masking.\n");
+ reportVectorizationFailure(
+ "No primary induction, cannot fold tail by masking",
+ "Missing a primary induction variable in the loop, which is "
+ "needed in order to fold tail by masking as required.",
+ "NoPrimaryInduction");
return false;
}
// TODO: handle reductions when tail is folded by masking.
if (!Reductions.empty()) {
- ORE->emit(createMissedAnalysis("ReductionFoldingTailByMasking")
- << "Cannot fold tail by masking in the presence of reductions.");
- LLVM_DEBUG(dbgs() << "LV: Loop has reductions, cannot fold tail by "
- << "masking.\n");
+ reportVectorizationFailure(
+ "Loop has reductions, cannot fold tail by masking",
+ "Cannot fold tail by masking in the presence of reductions.",
+ "ReductionFoldingTailByMasking");
return false;
}
@@ -1183,10 +1219,10 @@ bool LoopVectorizationLegality::canFoldTailByMasking() {
Instruction *UI = cast<Instruction>(U);
if (TheLoop->contains(UI))
continue;
- ORE->emit(createMissedAnalysis("LiveOutFoldingTailByMasking")
- << "Cannot fold tail by masking in the presence of live outs.");
- LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop has an "
- << "outside user for : " << *UI << '\n');
+ reportVectorizationFailure(
+ "Cannot fold tail by masking, loop has an outside user for",
+ "Cannot fold tail by masking in the presence of live outs.",
+ "LiveOutFoldingTailByMasking", UI);
return false;
}
}
@@ -1198,9 +1234,10 @@ bool LoopVectorizationLegality::canFoldTailByMasking() {
// do not need predication such as the header block.
for (BasicBlock *BB : TheLoop->blocks()) {
if (!blockCanBePredicated(BB, SafePointers)) {
- ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
- << "control flow cannot be substituted for a select");
- LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as required.\n");
+ reportVectorizationFailure(
+ "Cannot fold tail by masking as required",
+ "control flow cannot be substituted for a select",
+ "NoCFGForSelect", BB->getTerminator());
return false;
}
}
diff --git a/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 2aa219064299..97077cce83e3 100644
--- a/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -1,9 +1,8 @@
//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -172,6 +171,13 @@ struct VectorizationFactor {
unsigned Width;
// Cost of the loop with that width
unsigned Cost;
+
+ // Width 1 means no vectorization, cost 0 means uncomputed cost.
+ static VectorizationFactor Disabled() { return {1, 0}; }
+
+ bool operator==(const VectorizationFactor &rhs) const {
+ return Width == rhs.Width && Cost == rhs.Cost;
+ }
};
/// Planner drives the vectorization process after having passed
@@ -192,11 +198,9 @@ class LoopVectorizationPlanner {
/// The legality analysis.
LoopVectorizationLegality *Legal;
- /// The profitablity analysis.
+ /// The profitability analysis.
LoopVectorizationCostModel &CM;
- using VPlanPtr = std::unique_ptr<VPlan>;
-
SmallVector<VPlanPtr, 4> VPlans;
/// This class is used to enable the VPlan to invoke a method of ILV. This is
@@ -222,8 +226,9 @@ public:
LoopVectorizationCostModel &CM)
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {}
- /// Plan how to best vectorize, return the best VF and its cost.
- VectorizationFactor plan(bool OptForSize, unsigned UserVF);
+ /// Plan how to best vectorize, return the best VF and its cost, or None if
+ /// vectorization and interleaving should be avoided up front.
+ Optional<VectorizationFactor> plan(bool OptForSize, unsigned UserVF);
/// Use the VPlan-native path to plan how to best vectorize, return the best
/// VF and its cost.
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index c45dee590b84..46265e3f3e13 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1,9 +1,8 @@
//===- LoopVectorize.cpp - A Loop Vectorizer ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -57,8 +56,10 @@
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "LoopVectorizationPlanner.h"
#include "VPRecipeBuilder.h"
+#include "VPlan.h"
#include "VPlanHCFGBuilder.h"
#include "VPlanHCFGTransforms.h"
+#include "VPlanPredicator.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -86,7 +87,9 @@
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -133,6 +136,7 @@
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include <algorithm>
#include <cassert>
@@ -256,6 +260,13 @@ cl::opt<bool> EnableVPlanNativePath(
cl::desc("Enable VPlan-native vectorization path with "
"support for outer loop vectorization."));
+// FIXME: Remove this switch once we have divergence analysis. Currently we
+// assume divergent non-backedge branches when this switch is true.
+cl::opt<bool> EnableVPlanPredication(
+ "enable-vplan-predication", cl::init(false), cl::Hidden,
+ cl::desc("Enable VPlan-native vectorization path predicator with "
+ "support for outer loop vectorization."));
+
// This flag enables the stress testing of the VPlan H-CFG construction in the
// VPlan-native vectorization path. It must be used in conjuction with
// -enable-vplan-native-path. -vplan-verify-hcfg can also be used to enable the
@@ -267,6 +278,13 @@ static cl::opt<bool> VPlanBuildStressTest(
"out right after the build (stress test the VPlan H-CFG construction "
"in the VPlan-native vectorization path)."));
+cl::opt<bool> llvm::EnableLoopInterleaving(
+ "interleave-loops", cl::init(true), cl::Hidden,
+ cl::desc("Enable loop interleaving in Loop vectorization passes"));
+cl::opt<bool> llvm::EnableLoopVectorization(
+ "vectorize-loops", cl::init(true), cl::Hidden,
+ cl::desc("Run the Loop vectorization passes"));
+
/// A helper function for converting Scalar types to vector types.
/// If the incoming type is void, we return void. If the VF is 1, we return
/// the scalar type.
@@ -311,11 +329,14 @@ static unsigned getReciprocalPredBlockProb() { return 2; }
/// A helper function that adds a 'fast' flag to floating-point operations.
static Value *addFastMathFlag(Value *V) {
- if (isa<FPMathOperator>(V)) {
- FastMathFlags Flags;
- Flags.setFast();
- cast<Instruction>(V)->setFastMathFlags(Flags);
- }
+ if (isa<FPMathOperator>(V))
+ cast<Instruction>(V)->setFastMathFlags(FastMathFlags::getFast());
+ return V;
+}
+
+static Value *addFastMathFlag(Value *V, FastMathFlags FMF) {
+ if (isa<FPMathOperator>(V))
+ cast<Instruction>(V)->setFastMathFlags(FMF);
return V;
}
@@ -760,7 +781,7 @@ void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr)
const DILocation *DIL = Inst->getDebugLoc();
if (DIL && Inst->getFunction()->isDebugInfoForProfiling() &&
!isa<DbgInfoIntrinsic>(Inst)) {
- auto NewDIL = DIL->cloneWithDuplicationFactor(UF * VF);
+ auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(UF * VF);
if (NewDIL)
B.SetCurrentDebugLocation(NewDIL.getValue());
else
@@ -836,7 +857,7 @@ public:
AC(AC), ORE(ORE), TheFunction(F), Hints(Hints), InterleaveInfo(IAI) {}
/// \return An upper bound for the vectorization factor, or None if
- /// vectorization should be avoided up front.
+ /// vectorization and interleaving should be avoided up front.
Optional<unsigned> computeMaxVF(bool OptForSize);
/// \return The most profitable vectorization factor and the cost of that VF.
@@ -1149,6 +1170,18 @@ public:
return foldTailByMasking() || Legal->blockNeedsPredication(BB);
}
+ /// Estimate cost of an intrinsic call instruction CI if it were vectorized
+ /// with factor VF. Return the cost of the instruction, including
+ /// scalarization overhead if it's needed.
+ unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF);
+
+ /// Estimate cost of a call instruction CI if it were vectorized with factor
+ /// VF. Return the cost of the instruction, including scalarization overhead
+ /// if it's needed. The flag NeedToScalarize shows if the call needs to be
+ /// scalarized -
+ /// i.e. either vector version isn't available, or is too expensive.
+ unsigned getVectorCallCost(CallInst *CI, unsigned VF, bool &NeedToScalarize);
+
private:
unsigned NumPredStores = 0;
@@ -1201,6 +1234,10 @@ private:
/// element)
unsigned getUniformMemOpCost(Instruction *I, unsigned VF);
+ /// Estimate the overhead of scalarizing an instruction. This is a
+ /// convenience wrapper for the type-based getScalarizationOverhead API.
+ unsigned getScalarizationOverhead(Instruction *I, unsigned VF);
+
/// Returns whether the instruction is a load or store and will be a emitted
/// as a vector operation.
bool isConsecutiveLoadOrStore(Instruction *I);
@@ -1295,6 +1332,30 @@ private:
DecisionList WideningDecisions;
+ /// Returns true if \p V is expected to be vectorized and it needs to be
+ /// extracted.
+ bool needsExtract(Value *V, unsigned VF) const {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (VF == 1 || !I || !TheLoop->contains(I) || TheLoop->isLoopInvariant(I))
+ return false;
+
+ // Assume we can vectorize V (and hence we need extraction) if the
+ // scalars are not computed yet. This can happen, because it is called
+ // via getScalarizationOverhead from setCostBasedWideningDecision, before
+ // the scalars are collected. That should be a safe assumption in most
+ // cases, because we check if the operands have vectorizable types
+ // beforehand in LoopVectorizationLegality.
+ return Scalars.find(VF) == Scalars.end() ||
+ !isScalarAfterVectorization(I, VF);
+ };
+
+ /// Returns a range containing only operands needing to be extracted.
+ SmallVector<Value *, 4> filterExtractingOperands(Instruction::op_range Ops,
+ unsigned VF) {
+ return SmallVector<Value *, 4>(make_filter_range(
+ Ops, [this, VF](Value *V) { return this->needsExtract(V, VF); }));
+ }
+
public:
/// The loop that we evaluate.
Loop *TheLoop;
@@ -1372,12 +1433,6 @@ static bool isExplicitVecOuterLoop(Loop *OuterLp,
return false;
}
- if (!Hints.getWidth()) {
- LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No user vector width.\n");
- Hints.emitRemarkWithHints();
- return false;
- }
-
if (Hints.getInterleave() > 1) {
// TODO: Interleave support is future work.
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Interleave is not supported for "
@@ -1447,12 +1502,13 @@ struct LoopVectorize : public FunctionPass {
auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
[&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC,
- GetLAA, *ORE);
+ GetLAA, *ORE, PSI);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -1478,6 +1534,7 @@ struct LoopVectorize : public FunctionPass {
AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
}
};
@@ -2051,7 +2108,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
// A[i] = b; // Member of index 0
// A[i+2] = c; // Member of index 2 (Current instruction)
// Current pointer is pointed to A[i+2], adjust it to A[i].
- NewPtr = Builder.CreateGEP(NewPtr, Builder.getInt32(-Index));
+ NewPtr = Builder.CreateGEP(ScalarTy, NewPtr, Builder.getInt32(-Index));
if (InBounds)
cast<GetElementPtrInst>(NewPtr)->setIsInBounds(true);
@@ -2093,8 +2150,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
GroupMask, UndefVec, "wide.masked.vec");
}
else
- NewLoad = Builder.CreateAlignedLoad(NewPtrs[Part],
- Group->getAlignment(), "wide.vec");
+ NewLoad = Builder.CreateAlignedLoad(VecTy, NewPtrs[Part],
+ Group->getAlignment(), "wide.vec");
Group->addMetadata(NewLoad);
NewLoads.push_back(NewLoad);
}
@@ -2239,16 +2296,16 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
// If the address is consecutive but reversed, then the
// wide store needs to start at the last vector element.
PartPtr = cast<GetElementPtrInst>(
- Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)));
+ Builder.CreateGEP(ScalarDataTy, Ptr, Builder.getInt32(-Part * VF)));
PartPtr->setIsInBounds(InBounds);
PartPtr = cast<GetElementPtrInst>(
- Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)));
+ Builder.CreateGEP(ScalarDataTy, PartPtr, Builder.getInt32(1 - VF)));
PartPtr->setIsInBounds(InBounds);
if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
Mask[Part] = reverseVector(Mask[Part]);
} else {
PartPtr = cast<GetElementPtrInst>(
- Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)));
+ Builder.CreateGEP(ScalarDataTy, Ptr, Builder.getInt32(Part * VF)));
PartPtr->setIsInBounds(InBounds);
}
@@ -2305,7 +2362,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
UndefValue::get(DataTy),
"wide.masked.load");
else
- NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load");
+ NewLI =
+ Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load");
// Add metadata to the load, but setVectorValue to the reverse shuffle.
addMetadata(NewLI, LI);
@@ -2665,7 +2723,7 @@ Value *InnerLoopVectorizer::emitTransformedIndex(
assert(isa<SCEVConstant>(Step) &&
"Expected constant step for pointer induction");
return B.CreateGEP(
- nullptr, StartValue,
+ StartValue->getType()->getPointerElementType(), StartValue,
CreateMul(Index, Exp.expandCodeFor(Step, Index->getType(),
&*B.GetInsertPoint())));
}
@@ -2849,26 +2907,42 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
BCResumeVal->addIncoming(EndValue, MiddleBlock);
// Fix the scalar body counter (PHI node).
- unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
-
// The old induction's phi node in the scalar body needs the truncated
// value.
for (BasicBlock *BB : LoopBypassBlocks)
BCResumeVal->addIncoming(II.getStartValue(), BB);
- OrigPhi->setIncomingValue(BlockIdx, BCResumeVal);
+ OrigPhi->setIncomingValueForBlock(ScalarPH, BCResumeVal);
}
+ // We need the OrigLoop (scalar loop part) latch terminator to help
+ // produce correct debug info for the middle block BB instructions.
+ // The legality check stage guarantees that the loop will have a single
+ // latch.
+ assert(isa<BranchInst>(OrigLoop->getLoopLatch()->getTerminator()) &&
+ "Scalar loop latch terminator isn't a branch");
+ BranchInst *ScalarLatchBr =
+ cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
+
// Add a check in the middle block to see if we have completed
// all of the iterations in the first vector loop.
// If (N - N%VF) == N, then we *don't* need to run the remainder.
// If tail is to be folded, we know we don't need to run the remainder.
Value *CmpN = Builder.getTrue();
- if (!Cost->foldTailByMasking())
+ if (!Cost->foldTailByMasking()) {
CmpN =
CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
CountRoundDown, "cmp.n", MiddleBlock->getTerminator());
- ReplaceInstWithInst(MiddleBlock->getTerminator(),
- BranchInst::Create(ExitBlock, ScalarPH, CmpN));
+
+ // Here we use the same DebugLoc as the scalar loop latch branch instead
+ // of the corresponding compare because they may have ended up with
+ // different line numbers and we want to avoid awkward line stepping while
+ // debugging. Eg. if the compare has got a line number inside the loop.
+ cast<Instruction>(CmpN)->setDebugLoc(ScalarLatchBr->getDebugLoc());
+ }
+
+ BranchInst *BrInst = BranchInst::Create(ExitBlock, ScalarPH, CmpN);
+ BrInst->setDebugLoc(ScalarLatchBr->getDebugLoc());
+ ReplaceInstWithInst(MiddleBlock->getTerminator(), BrInst);
// Get ready to start creating new instructions into the vectorized body.
Builder.SetInsertPoint(&*VecBody->getFirstInsertionPt());
@@ -3022,45 +3096,9 @@ static void cse(BasicBlock *BB) {
}
}
-/// Estimate the overhead of scalarizing an instruction. This is a
-/// convenience wrapper for the type-based getScalarizationOverhead API.
-static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
- const TargetTransformInfo &TTI) {
- if (VF == 1)
- return 0;
-
- unsigned Cost = 0;
- Type *RetTy = ToVectorTy(I->getType(), VF);
- if (!RetTy->isVoidTy() &&
- (!isa<LoadInst>(I) ||
- !TTI.supportsEfficientVectorElementLoadStore()))
- Cost += TTI.getScalarizationOverhead(RetTy, true, false);
-
- // Some targets keep addresses scalar.
- if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
- return Cost;
-
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- SmallVector<const Value *, 4> Operands(CI->arg_operands());
- Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
- }
- else if (!isa<StoreInst>(I) ||
- !TTI.supportsEfficientVectorElementLoadStore()) {
- SmallVector<const Value *, 4> Operands(I->operand_values());
- Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
- }
-
- return Cost;
-}
-
-// Estimate cost of a call instruction CI if it were vectorized with factor VF.
-// Return the cost of the instruction, including scalarization overhead if it's
-// needed. The flag NeedToScalarize shows if the call needs to be scalarized -
-// i.e. either vector version isn't available, or is too expensive.
-static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
- const TargetTransformInfo &TTI,
- const TargetLibraryInfo *TLI,
- bool &NeedToScalarize) {
+unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
+ unsigned VF,
+ bool &NeedToScalarize) {
Function *F = CI->getCalledFunction();
StringRef FnName = CI->getCalledFunction()->getName();
Type *ScalarRetTy = CI->getType();
@@ -3083,7 +3121,7 @@ static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
// Compute costs of unpacking argument values for the scalar calls and
// packing the return values to a vector.
- unsigned ScalarizationCost = getScalarizationOverhead(CI, VF, TTI);
+ unsigned ScalarizationCost = getScalarizationOverhead(CI, VF);
unsigned Cost = ScalarCallCost * VF + ScalarizationCost;
@@ -3102,12 +3140,8 @@ static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
return Cost;
}
-// Estimate cost of an intrinsic call instruction CI if it were vectorized with
-// factor VF. Return the cost of the instruction, including scalarization
-// overhead if it's needed.
-static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF,
- const TargetTransformInfo &TTI,
- const TargetLibraryInfo *TLI) {
+unsigned LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
+ unsigned VF) {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
assert(ID && "Expected intrinsic call!");
@@ -3468,7 +3502,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
Start->addIncoming(Incoming, BB);
}
- Phi->setIncomingValue(Phi->getBasicBlockIndex(LoopScalarPreHeader), Start);
+ Phi->setIncomingValueForBlock(LoopScalarPreHeader, Start);
Phi->setName("scalar.recur");
// Finally, fix users of the recurrence outside the loop. The users will need
@@ -3596,14 +3630,23 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
// Reduce all of the unrolled parts into a single vector.
Value *ReducedPartRdx = VectorLoopValueMap.getVectorValue(LoopExitInst, 0);
unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK);
- setDebugLocFromInst(Builder, ReducedPartRdx);
+
+ // The middle block terminator has already been assigned a DebugLoc here (the
+ // OrigLoop's single latch terminator). We want the whole middle block to
+ // appear to execute on this line because: (a) it is all compiler generated,
+ // (b) these instructions are always executed after evaluating the latch
+ // conditional branch, and (c) other passes may add new predecessors which
+ // terminate on this line. This is the easiest way to ensure we don't
+ // accidentally cause an extra step back into the loop while debugging.
+ setDebugLocFromInst(Builder, LoopMiddleBlock->getTerminator());
for (unsigned Part = 1; Part < UF; ++Part) {
Value *RdxPart = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
// Floating point operations had to be 'fast' to enable the reduction.
ReducedPartRdx = addFastMathFlag(
Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxPart,
- ReducedPartRdx, "bin.rdx"));
+ ReducedPartRdx, "bin.rdx"),
+ RdxDesc.getFastMathFlags());
else
ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
RdxPart);
@@ -3935,9 +3978,11 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
// Create the new GEP. Note that this GEP may be a scalar if VF == 1,
// but it should be a vector, otherwise.
- auto *NewGEP = GEP->isInBounds()
- ? Builder.CreateInBoundsGEP(Ptr, Indices)
- : Builder.CreateGEP(Ptr, Indices);
+ auto *NewGEP =
+ GEP->isInBounds()
+ ? Builder.CreateInBoundsGEP(GEP->getSourceElementType(), Ptr,
+ Indices)
+ : Builder.CreateGEP(GEP->getSourceElementType(), Ptr, Indices);
assert((VF == 1 || NewGEP->getType()->isVectorTy()) &&
"NewGEP is not a pointer vector");
VectorLoopValueMap.setVectorValue(&I, Part, NewGEP);
@@ -3955,6 +4000,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
+ case Instruction::FNeg:
case Instruction::Mul:
case Instruction::FMul:
case Instruction::FDiv:
@@ -3965,21 +4011,22 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- // Just widen binops.
- auto *BinOp = cast<BinaryOperator>(&I);
- setDebugLocFromInst(Builder, BinOp);
+ // Just widen unops and binops.
+ setDebugLocFromInst(Builder, &I);
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *A = getOrCreateVectorValue(BinOp->getOperand(0), Part);
- Value *B = getOrCreateVectorValue(BinOp->getOperand(1), Part);
- Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
+ SmallVector<Value *, 2> Ops;
+ for (Value *Op : I.operands())
+ Ops.push_back(getOrCreateVectorValue(Op, Part));
- if (BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V))
- VecOp->copyIRFlags(BinOp);
+ Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
+
+ if (auto *VecOp = dyn_cast<Instruction>(V))
+ VecOp->copyIRFlags(&I);
// Use this vector value for all users of the original instruction.
VectorLoopValueMap.setVectorValue(&I, Part, V);
- addMetadata(V, BinOp);
+ addMetadata(V, &I);
}
break;
@@ -4088,9 +4135,9 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
// version of the instruction.
// Is it beneficial to perform intrinsic call compared to lib call?
bool NeedToScalarize;
- unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
+ unsigned CallCost = Cost->getVectorCallCost(CI, VF, NeedToScalarize);
bool UseVectorIntrinsic =
- ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
+ ID && Cost->getVectorIntrinsicCost(CI, VF) <= CallCost;
assert((UseVectorIntrinsic || !NeedToScalarize) &&
"Instruction should be scalarized elsewhere.");
@@ -4395,6 +4442,13 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I,
auto *Group = getInterleavedAccessGroup(I);
assert(Group && "Must have a group.");
+ // If the instruction's allocated size doesn't equal it's type size, it
+ // requires padding and will be scalarized.
+ auto &DL = I->getModule()->getDataLayout();
+ auto *ScalarTy = getMemInstValueType(I);
+ if (hasIrregularType(ScalarTy, DL, VF))
+ return false;
+
// Check if masking is required.
// A Group may need masking for one of two reasons: it resides in a block that
// needs predication, or it was decided to use masking to deal with gaps.
@@ -4987,6 +5041,8 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
if (LoopCost == 0)
LoopCost = expectedCost(VF).first;
+ assert(LoopCost && "Non-zero loop cost expected");
+
// Clamp the calculated IC to be between the 1 and the max interleave count
// that the target allows.
if (IC > MaxInterleaveCount)
@@ -5314,15 +5370,6 @@ int LoopVectorizationCostModel::computePredInstDiscount(
return true;
};
- // Returns true if an operand that cannot be scalarized must be extracted
- // from a vector. We will account for this scalarization overhead below. Note
- // that the non-void predicated instructions are placed in their own blocks,
- // and their return values are inserted into vectors. Thus, an extract would
- // still be required.
- auto needsExtract = [&](Instruction *I) -> bool {
- return TheLoop->contains(I) && !isScalarAfterVectorization(I, VF);
- };
-
// Compute the expected cost discount from scalarizing the entire expression
// feeding the predicated instruction. We currently only consider expressions
// that are single-use instruction chains.
@@ -5362,7 +5409,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
"Instruction has non-scalar type");
if (canBeScalarized(J))
Worklist.push_back(J);
- else if (needsExtract(J))
+ else if (needsExtract(J, VF))
ScalarCost += TTI.getScalarizationOverhead(
ToVectorTy(J->getType(),VF), false, true);
}
@@ -5484,7 +5531,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
// Get the overhead of the extractelement and insertelement instructions
// we might create due to scalarization.
- Cost += getScalarizationOverhead(I, VF, TTI);
+ Cost += getScalarizationOverhead(I, VF);
// If we have a predicated store, it may not be executed for each vector
// lane. Scale the cost by the probability of executing the predicated
@@ -5636,6 +5683,36 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
return VectorizationCostTy(C, TypeNotScalarized);
}
+unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
+ unsigned VF) {
+
+ if (VF == 1)
+ return 0;
+
+ unsigned Cost = 0;
+ Type *RetTy = ToVectorTy(I->getType(), VF);
+ if (!RetTy->isVoidTy() &&
+ (!isa<LoadInst>(I) || !TTI.supportsEfficientVectorElementLoadStore()))
+ Cost += TTI.getScalarizationOverhead(RetTy, true, false);
+
+ // Some targets keep addresses scalar.
+ if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
+ return Cost;
+
+ // Some targets support efficient element stores.
+ if (isa<StoreInst>(I) && TTI.supportsEfficientVectorElementLoadStore())
+ return Cost;
+
+ // Collect operands to consider.
+ CallInst *CI = dyn_cast<CallInst>(I);
+ Instruction::op_range Ops = CI ? CI->arg_operands() : I->operands();
+
+ // Skip operands that do not require extraction/scalarization and do not incur
+ // any overhead.
+ return Cost + TTI.getOperandsScalarizationOverhead(
+ filterExtractingOperands(Ops, VF), VF);
+}
+
void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
if (VF == 1)
return;
@@ -5876,7 +5953,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
// The cost of insertelement and extractelement instructions needed for
// scalarization.
- Cost += getScalarizationOverhead(I, VF, TTI);
+ Cost += getScalarizationOverhead(I, VF);
// Scale the cost by the probability of executing the predicated blocks.
// This assumes the predicated block for each vector lane is equally
@@ -5916,6 +5993,14 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands);
}
+ case Instruction::FNeg: {
+ unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
+ return N * TTI.getArithmeticInstrCost(
+ I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
+ TargetTransformInfo::OK_AnyValue,
+ TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
+ I->getOperand(0));
+ }
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
@@ -5997,16 +6082,16 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
case Instruction::Call: {
bool NeedToScalarize;
CallInst *CI = cast<CallInst>(I);
- unsigned CallCost = getVectorCallCost(CI, VF, TTI, TLI, NeedToScalarize);
+ unsigned CallCost = getVectorCallCost(CI, VF, NeedToScalarize);
if (getVectorIntrinsicIDForCall(CI, TLI))
- return std::min(CallCost, getVectorIntrinsicCost(CI, VF, TTI, TLI));
+ return std::min(CallCost, getVectorIntrinsicCost(CI, VF));
return CallCost;
}
default:
// The cost of executing VF copies of the scalar instruction. This opcode
// is unknown. Assume that it is the same as 'mul'.
return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy) +
- getScalarizationOverhead(I, VF, TTI);
+ getScalarizationOverhead(I, VF);
} // end of switch.
}
@@ -6027,10 +6112,13 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
namespace llvm {
+Pass *createLoopVectorizePass() { return new LoopVectorize(); }
+
Pass *createLoopVectorizePass(bool InterleaveOnlyWhenForced,
bool VectorizeOnlyWhenForced) {
return new LoopVectorize(InterleaveOnlyWhenForced, VectorizeOnlyWhenForced);
@@ -6066,50 +6154,65 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
}
}
+// TODO: we could return a pair of values that specify the max VF and
+// min VF, to be used in `buildVPlans(MinVF, MaxVF)` instead of
+// `buildVPlans(VF, VF)`. We cannot do it because VPLAN at the moment
+// doesn't have a cost model that can choose which plan to execute if
+// more than one is generated.
+static unsigned determineVPlanVF(const unsigned WidestVectorRegBits,
+ LoopVectorizationCostModel &CM) {
+ unsigned WidestType;
+ std::tie(std::ignore, WidestType) = CM.getSmallestAndWidestTypes();
+ return WidestVectorRegBits / WidestType;
+}
+
VectorizationFactor
LoopVectorizationPlanner::planInVPlanNativePath(bool OptForSize,
unsigned UserVF) {
- // Width 1 means no vectorization, cost 0 means uncomputed cost.
- const VectorizationFactor NoVectorization = {1U, 0U};
-
+ unsigned VF = UserVF;
// Outer loop handling: They may require CFG and instruction level
// transformations before even evaluating whether vectorization is profitable.
// Since we cannot modify the incoming IR, we need to build VPlan upfront in
// the vectorization pipeline.
if (!OrigLoop->empty()) {
- // TODO: If UserVF is not provided, we set UserVF to 4 for stress testing.
- // This won't be necessary when UserVF is not required in the VPlan-native
- // path.
- if (VPlanBuildStressTest && !UserVF)
- UserVF = 4;
-
+ // If the user doesn't provide a vectorization factor, determine a
+ // reasonable one.
+ if (!UserVF) {
+ VF = determineVPlanVF(TTI->getRegisterBitWidth(true /* Vector*/), CM);
+ LLVM_DEBUG(dbgs() << "LV: VPlan computed VF " << VF << ".\n");
+
+ // Make sure we have a VF > 1 for stress testing.
+ if (VPlanBuildStressTest && VF < 2) {
+ LLVM_DEBUG(dbgs() << "LV: VPlan stress testing: "
+ << "overriding computed VF.\n");
+ VF = 4;
+ }
+ }
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
- assert(UserVF && "Expected UserVF for outer loop vectorization.");
- assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
- LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
- buildVPlans(UserVF, UserVF);
+ assert(isPowerOf2_32(VF) && "VF needs to be a power of two");
+ LLVM_DEBUG(dbgs() << "LV: Using " << (UserVF ? "user " : "") << "VF " << VF
+ << " to build VPlans.\n");
+ buildVPlans(VF, VF);
// For VPlan build stress testing, we bail out after VPlan construction.
if (VPlanBuildStressTest)
- return NoVectorization;
+ return VectorizationFactor::Disabled();
- return {UserVF, 0};
+ return {VF, 0};
}
LLVM_DEBUG(
dbgs() << "LV: Not vectorizing. Inner loops aren't supported in the "
"VPlan-native path.\n");
- return NoVectorization;
+ return VectorizationFactor::Disabled();
}
-VectorizationFactor
-LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) {
+Optional<VectorizationFactor> LoopVectorizationPlanner::plan(bool OptForSize,
+ unsigned UserVF) {
assert(OrigLoop->empty() && "Inner loop expected.");
- // Width 1 means no vectorization, cost 0 means uncomputed cost.
- const VectorizationFactor NoVectorization = {1U, 0U};
Optional<unsigned> MaybeMaxVF = CM.computeMaxVF(OptForSize);
- if (!MaybeMaxVF.hasValue()) // Cases considered too costly to vectorize.
- return NoVectorization;
+ if (!MaybeMaxVF) // Cases that should not to be vectorized nor interleaved.
+ return None;
// Invalidate interleave groups if all blocks of loop will be predicated.
if (CM.blockNeedsPredication(OrigLoop->getHeader()) &&
@@ -6129,7 +6232,7 @@ LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) {
CM.selectUserVectorizationFactor(UserVF);
buildVPlansWithVPRecipes(UserVF, UserVF);
LLVM_DEBUG(printPlans(dbgs()));
- return {UserVF, 0};
+ return {{UserVF, 0}};
}
unsigned MaxVF = MaybeMaxVF.getValue();
@@ -6148,7 +6251,7 @@ LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) {
buildVPlansWithVPRecipes(1, MaxVF);
LLVM_DEBUG(printPlans(dbgs()));
if (MaxVF == 1)
- return NoVectorization;
+ return VectorizationFactor::Disabled();
// Select the optimal vectorization factor.
return CM.selectVectorizationFactor(MaxVF);
@@ -6527,6 +6630,7 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
case Instruction::FCmp:
case Instruction::FDiv:
case Instruction::FMul:
+ case Instruction::FNeg:
case Instruction::FPExt:
case Instruction::FPToSI:
case Instruction::FPToUI:
@@ -6582,9 +6686,9 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
// version of the instruction.
// Is it beneficial to perform intrinsic call compared to lib call?
bool NeedToScalarize;
- unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
+ unsigned CallCost = CM.getVectorCallCost(CI, VF, NeedToScalarize);
bool UseVectorIntrinsic =
- ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
+ ID && CM.getVectorIntrinsicCost(CI, VF) <= CallCost;
return UseVectorIntrinsic || !NeedToScalarize;
}
if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
@@ -6756,8 +6860,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
}
}
-LoopVectorizationPlanner::VPlanPtr
-LoopVectorizationPlanner::buildVPlanWithVPRecipes(
+VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
SmallPtrSetImpl<Instruction *> &DeadInstructions) {
// Hold a mapping from predicated instructions to their recipes, in order to
@@ -6772,7 +6875,7 @@ LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
auto Plan = llvm::make_unique<VPlan>(VPBB);
- VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, TTI, Legal, CM, Builder);
+ VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
// Represent values that will have defs inside VPlan.
for (Value *V : NeedDef)
Plan->addVPValue(V);
@@ -6881,8 +6984,7 @@ LoopVectorizationPlanner::buildVPlanWithVPRecipes(
return Plan;
}
-LoopVectorizationPlanner::VPlanPtr
-LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
+VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
// Outer loop handling: They may require CFG and instruction level
// transformations before even evaluating whether vectorization is profitable.
// Since we cannot modify the incoming IR, we need to build VPlan upfront in
@@ -6897,13 +6999,22 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
HCFGBuilder.buildHierarchicalCFG();
+ for (unsigned VF = Range.Start; VF < Range.End; VF *= 2)
+ Plan->addVF(VF);
+
+ if (EnableVPlanPredication) {
+ VPlanPredicator VPP(*Plan);
+ VPP.predicate();
+
+ // Avoid running transformation to recipes until masked code generation in
+ // VPlan-native path is in place.
+ return Plan;
+ }
+
SmallPtrSet<Instruction *, 1> DeadInstructions;
VPlanHCFGTransforms::VPInstructionsToVPRecipes(
Plan, Legal->getInductionVars(), DeadInstructions);
- for (unsigned VF = Range.Start; VF < Range.End; VF *= 2)
- Plan->addVF(VF);
-
return Plan;
}
@@ -7096,7 +7207,8 @@ static bool processLoopInVPlanNativePath(
Loop *L, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT,
LoopVectorizationLegality *LVL, TargetTransformInfo *TTI,
TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE, LoopVectorizeHints &Hints) {
+ OptimizationRemarkEmitter *ORE, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI, LoopVectorizeHints &Hints) {
assert(EnableVPlanNativePath && "VPlan-native path is disabled.");
Function *F = L->getHeader()->getParent();
@@ -7109,24 +7221,28 @@ static bool processLoopInVPlanNativePath(
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM);
// Get user vectorization factor.
- unsigned UserVF = Hints.getWidth();
+ const unsigned UserVF = Hints.getWidth();
- // Check the function attributes to find out if this function should be
- // optimized for size.
+ // Check the function attributes and profiles to find out if this function
+ // should be optimized for size.
bool OptForSize =
- Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
+ Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
+ (F->hasOptSize() ||
+ llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI));
// Plan how to best vectorize, return the best VF and its cost.
- VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
+ const VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
// If we are stress testing VPlan builds, do not attempt to generate vector
- // code.
- if (VPlanBuildStressTest)
+ // code. Masked vector code generation support will follow soon.
+ // Also, do not attempt to vectorize if no vector code will be produced.
+ if (VPlanBuildStressTest || EnableVPlanPredication ||
+ VectorizationFactor::Disabled() == VF)
return false;
LVP.setBestPlan(VF.Width, 1);
- InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, UserVF, 1, LVL,
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL,
&CM);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
<< L->getHeader()->getParent()->getName() << "\"\n");
@@ -7184,7 +7300,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Check if it is legal to vectorize the loop.
LoopVectorizationRequirements Requirements(*ORE);
- LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, GetLAA, LI, ORE,
+ LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, AA, F, GetLAA, LI, ORE,
&Requirements, &Hints, DB, AC);
if (!LVL.canVectorize(EnableVPlanNativePath)) {
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
@@ -7192,10 +7308,12 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}
- // Check the function attributes to find out if this function should be
- // optimized for size.
+ // Check the function attributes and profiles to find out if this function
+ // should be optimized for size.
bool OptForSize =
- Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
+ Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
+ (F->hasOptSize() ||
+ llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI));
// Entrance to the VPlan-native vectorization path. Outer loops are processed
// here. They may require CFG and instruction level transformations before
@@ -7204,7 +7322,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// pipeline.
if (!L->empty())
return processLoopInVPlanNativePath(L, PSE, LI, DT, &LVL, TTI, TLI, DB, AC,
- ORE, Hints);
+ ORE, BFI, PSI, Hints);
assert(L->empty() && "Inner loop expected.");
// Check the loop for a trip count threshold: vectorize loops with a tiny trip
@@ -7304,14 +7422,18 @@ bool LoopVectorizePass::processLoop(Loop *L) {
unsigned UserVF = Hints.getWidth();
// Plan how to best vectorize, return the best VF and its cost.
- VectorizationFactor VF = LVP.plan(OptForSize, UserVF);
+ Optional<VectorizationFactor> MaybeVF = LVP.plan(OptForSize, UserVF);
- // Select the interleave count.
- unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost);
-
- // Get user interleave count.
+ VectorizationFactor VF = VectorizationFactor::Disabled();
+ unsigned IC = 1;
unsigned UserIC = Hints.getInterleave();
+ if (MaybeVF) {
+ VF = *MaybeVF;
+ // Select the interleave count.
+ IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost);
+ }
+
// Identify the diagnostic messages that should be produced.
std::pair<StringRef, std::string> VecDiagMsg, IntDiagMsg;
bool VectorizeLoop = true, InterleaveLoop = true;
@@ -7330,7 +7452,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VectorizeLoop = false;
}
- if (IC == 1 && UserIC <= 1) {
+ if (!MaybeVF && UserIC > 1) {
+ // Tell the user interleaving was avoided up-front, despite being explicitly
+ // requested.
+ LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and "
+ "interleaving should be avoided up front\n");
+ IntDiagMsg = std::make_pair(
+ "InterleavingAvoided",
+ "Ignoring UserIC, because interleaving was avoided up front");
+ InterleaveLoop = false;
+ } else if (IC == 1 && UserIC <= 1) {
// Tell the user interleaving is not beneficial.
LLVM_DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n");
IntDiagMsg = std::make_pair(
@@ -7457,7 +7588,7 @@ bool LoopVectorizePass::runImpl(
DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
- OptimizationRemarkEmitter &ORE_) {
+ OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_) {
SE = &SE_;
LI = &LI_;
TTI = &TTI_;
@@ -7469,6 +7600,7 @@ bool LoopVectorizePass::runImpl(
GetLAA = &GetLAA_;
DB = &DB_;
ORE = &ORE_;
+ PSI = PSI_;
// Don't attempt if
// 1. the target claims to have no vector registers, and
@@ -7488,7 +7620,8 @@ bool LoopVectorizePass::runImpl(
// will simplify all loops, regardless of whether anything end up being
// vectorized.
for (auto &L : *LI)
- Changed |= simplifyLoop(L, DT, LI, SE, AC, false /* PreserveLCSSA */);
+ Changed |=
+ simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */);
// Build up a worklist of inner-loops to vectorize. This is necessary as
// the act of vectorizing or partially unrolling a loop creates new loops
@@ -7527,15 +7660,22 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto &DB = AM.getResult<DemandedBitsAnalysis>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ MemorySSA *MSSA = EnableMSSALoopDependency
+ ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
+ : nullptr;
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
[&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, nullptr};
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
};
+ const ModuleAnalysisManager &MAM =
+ AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+ ProfileSummaryInfo *PSI =
+ MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
bool Changed =
- runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE);
+ runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE, PSI);
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2e856a7e6802..27a86c0bca91 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1,9 +1,8 @@
//===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -106,6 +105,10 @@ using namespace slpvectorizer;
STATISTIC(NumVectorInstructions, "Number of vector instructions generated");
+cl::opt<bool>
+ llvm::RunSLPVectorization("vectorize-slp", cl::init(false), cl::Hidden,
+ cl::desc("Run the SLP vectorization passes"));
+
static cl::opt<int>
SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
cl::desc("Only vectorize if you gain more than this "
@@ -207,6 +210,13 @@ static bool isSplat(ArrayRef<Value *> VL) {
return true;
}
+/// \returns True if \p I is commutative, handles CmpInst as well as Instruction.
+static bool isCommutative(Instruction *I) {
+ if (auto *IC = dyn_cast<CmpInst>(I))
+ return IC->isCommutative();
+ return I->isCommutative();
+}
+
/// Checks if the vector of instructions can be represented as a shuffle, like:
/// %x0 = extractelement <4 x i8> %x, i32 0
/// %x3 = extractelement <4 x i8> %x, i32 3
@@ -438,8 +448,9 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
case Instruction::Call: {
CallInst *CI = cast<CallInst>(UserInst);
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
- if (hasVectorInstrinsicScalarOpd(ID, 1)) {
- return (CI->getArgOperand(1) == Scalar);
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
+ if (hasVectorInstrinsicScalarOpd(ID, i))
+ return (CI->getArgOperand(i) == Scalar);
}
LLVM_FALLTHROUGH;
}
@@ -474,6 +485,8 @@ namespace slpvectorizer {
/// Bottom Up SLP Vectorizer.
class BoUpSLP {
+ struct TreeEntry;
+
public:
using ValueList = SmallVector<Value *, 8>;
using InstrList = SmallVector<Instruction *, 16>;
@@ -517,7 +530,7 @@ public:
/// \returns the cost incurred by unwanted spills and fills, caused by
/// holding live values over call sites.
- int getSpillCost();
+ int getSpillCost() const;
/// \returns the vectorization cost of the subtree that starts at \p VL.
/// A negative number means that this is profitable.
@@ -576,7 +589,7 @@ public:
/// the stored value. Otherwise, the size is the width of the largest loaded
/// value reaching V. This method is used by the vectorizer to calculate
/// vectorization factors.
- unsigned getVectorElementSize(Value *V);
+ unsigned getVectorElementSize(Value *V) const;
/// Compute the minimum type sizes required to represent the entries in a
/// vectorizable tree.
@@ -599,13 +612,512 @@ public:
/// \returns True if the VectorizableTree is both tiny and not fully
/// vectorizable. We do not vectorize such trees.
- bool isTreeTinyAndNotFullyVectorizable();
+ bool isTreeTinyAndNotFullyVectorizable() const;
OptimizationRemarkEmitter *getORE() { return ORE; }
-private:
- struct TreeEntry;
+ /// This structure holds any data we need about the edges being traversed
+ /// during buildTree_rec(). We keep track of:
+ /// (i) the user TreeEntry index, and
+ /// (ii) the index of the edge.
+ struct EdgeInfo {
+ EdgeInfo() = default;
+ EdgeInfo(TreeEntry *UserTE, unsigned EdgeIdx)
+ : UserTE(UserTE), EdgeIdx(EdgeIdx) {}
+ /// The user TreeEntry.
+ TreeEntry *UserTE = nullptr;
+ /// The operand index of the use.
+ unsigned EdgeIdx = UINT_MAX;
+#ifndef NDEBUG
+ friend inline raw_ostream &operator<<(raw_ostream &OS,
+ const BoUpSLP::EdgeInfo &EI) {
+ EI.dump(OS);
+ return OS;
+ }
+ /// Debug print.
+ void dump(raw_ostream &OS) const {
+ OS << "{User:" << (UserTE ? std::to_string(UserTE->Idx) : "null")
+ << " EdgeIdx:" << EdgeIdx << "}";
+ }
+ LLVM_DUMP_METHOD void dump() const { dump(dbgs()); }
+#endif
+ };
+
+ /// A helper data structure to hold the operands of a vector of instructions.
+ /// This supports a fixed vector length for all operand vectors.
+ class VLOperands {
+ /// For each operand we need (i) the value, and (ii) the opcode that it
+ /// would be attached to if the expression was in a left-linearized form.
+ /// This is required to avoid illegal operand reordering.
+ /// For example:
+ /// \verbatim
+ /// 0 Op1
+ /// |/
+ /// Op1 Op2 Linearized + Op2
+ /// \ / ----------> |/
+ /// - -
+ ///
+ /// Op1 - Op2 (0 + Op1) - Op2
+ /// \endverbatim
+ ///
+ /// Value Op1 is attached to a '+' operation, and Op2 to a '-'.
+ ///
+ /// Another way to think of this is to track all the operations across the
+ /// path from the operand all the way to the root of the tree and to
+ /// calculate the operation that corresponds to this path. For example, the
+ /// path from Op2 to the root crosses the RHS of the '-', therefore the
+ /// corresponding operation is a '-' (which matches the one in the
+ /// linearized tree, as shown above).
+ ///
+ /// For lack of a better term, we refer to this operation as Accumulated
+ /// Path Operation (APO).
+ struct OperandData {
+ OperandData() = default;
+ OperandData(Value *V, bool APO, bool IsUsed)
+ : V(V), APO(APO), IsUsed(IsUsed) {}
+ /// The operand value.
+ Value *V = nullptr;
+ /// TreeEntries only allow a single opcode, or an alternate sequence of
+ /// them (e.g, +, -). Therefore, we can safely use a boolean value for the
+ /// APO. It is set to 'true' if 'V' is attached to an inverse operation
+ /// in the left-linearized form (e.g., Sub/Div), and 'false' otherwise
+ /// (e.g., Add/Mul)
+ bool APO = false;
+ /// Helper data for the reordering function.
+ bool IsUsed = false;
+ };
+
+ /// During operand reordering, we are trying to select the operand at lane
+ /// that matches best with the operand at the neighboring lane. Our
+ /// selection is based on the type of value we are looking for. For example,
+ /// if the neighboring lane has a load, we need to look for a load that is
+ /// accessing a consecutive address. These strategies are summarized in the
+ /// 'ReorderingMode' enumerator.
+ enum class ReorderingMode {
+ Load, ///< Matching loads to consecutive memory addresses
+ Opcode, ///< Matching instructions based on opcode (same or alternate)
+ Constant, ///< Matching constants
+ Splat, ///< Matching the same instruction multiple times (broadcast)
+ Failed, ///< We failed to create a vectorizable group
+ };
+
+ using OperandDataVec = SmallVector<OperandData, 2>;
+
+ /// A vector of operand vectors.
+ SmallVector<OperandDataVec, 4> OpsVec;
+
+ const DataLayout &DL;
+ ScalarEvolution &SE;
+
+ /// \returns the operand data at \p OpIdx and \p Lane.
+ OperandData &getData(unsigned OpIdx, unsigned Lane) {
+ return OpsVec[OpIdx][Lane];
+ }
+
+ /// \returns the operand data at \p OpIdx and \p Lane. Const version.
+ const OperandData &getData(unsigned OpIdx, unsigned Lane) const {
+ return OpsVec[OpIdx][Lane];
+ }
+
+ /// Clears the used flag for all entries.
+ void clearUsed() {
+ for (unsigned OpIdx = 0, NumOperands = getNumOperands();
+ OpIdx != NumOperands; ++OpIdx)
+ for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
+ ++Lane)
+ OpsVec[OpIdx][Lane].IsUsed = false;
+ }
+
+ /// Swap the operand at \p OpIdx1 with that one at \p OpIdx2.
+ void swap(unsigned OpIdx1, unsigned OpIdx2, unsigned Lane) {
+ std::swap(OpsVec[OpIdx1][Lane], OpsVec[OpIdx2][Lane]);
+ }
+
+ // Search all operands in Ops[*][Lane] for the one that matches best
+ // Ops[OpIdx][LastLane] and return its opreand index.
+ // If no good match can be found, return None.
+ Optional<unsigned>
+ getBestOperand(unsigned OpIdx, int Lane, int LastLane,
+ ArrayRef<ReorderingMode> ReorderingModes) {
+ unsigned NumOperands = getNumOperands();
+
+ // The operand of the previous lane at OpIdx.
+ Value *OpLastLane = getData(OpIdx, LastLane).V;
+
+ // Our strategy mode for OpIdx.
+ ReorderingMode RMode = ReorderingModes[OpIdx];
+
+ // The linearized opcode of the operand at OpIdx, Lane.
+ bool OpIdxAPO = getData(OpIdx, Lane).APO;
+
+ const unsigned BestScore = 2;
+ const unsigned GoodScore = 1;
+
+ // The best operand index and its score.
+ // Sometimes we have more than one option (e.g., Opcode and Undefs), so we
+ // are using the score to differentiate between the two.
+ struct BestOpData {
+ Optional<unsigned> Idx = None;
+ unsigned Score = 0;
+ } BestOp;
+
+ // Iterate through all unused operands and look for the best.
+ for (unsigned Idx = 0; Idx != NumOperands; ++Idx) {
+ // Get the operand at Idx and Lane.
+ OperandData &OpData = getData(Idx, Lane);
+ Value *Op = OpData.V;
+ bool OpAPO = OpData.APO;
+
+ // Skip already selected operands.
+ if (OpData.IsUsed)
+ continue;
+
+ // Skip if we are trying to move the operand to a position with a
+ // different opcode in the linearized tree form. This would break the
+ // semantics.
+ if (OpAPO != OpIdxAPO)
+ continue;
+
+ // Look for an operand that matches the current mode.
+ switch (RMode) {
+ case ReorderingMode::Load:
+ if (isa<LoadInst>(Op)) {
+ // Figure out which is left and right, so that we can check for
+ // consecutive loads
+ bool LeftToRight = Lane > LastLane;
+ Value *OpLeft = (LeftToRight) ? OpLastLane : Op;
+ Value *OpRight = (LeftToRight) ? Op : OpLastLane;
+ if (isConsecutiveAccess(cast<LoadInst>(OpLeft),
+ cast<LoadInst>(OpRight), DL, SE))
+ BestOp.Idx = Idx;
+ }
+ break;
+ case ReorderingMode::Opcode:
+ // We accept both Instructions and Undefs, but with different scores.
+ if ((isa<Instruction>(Op) && isa<Instruction>(OpLastLane) &&
+ cast<Instruction>(Op)->getOpcode() ==
+ cast<Instruction>(OpLastLane)->getOpcode()) ||
+ (isa<UndefValue>(OpLastLane) && isa<Instruction>(Op)) ||
+ isa<UndefValue>(Op)) {
+ // An instruction has a higher score than an undef.
+ unsigned Score = (isa<UndefValue>(Op)) ? GoodScore : BestScore;
+ if (Score > BestOp.Score) {
+ BestOp.Idx = Idx;
+ BestOp.Score = Score;
+ }
+ }
+ break;
+ case ReorderingMode::Constant:
+ if (isa<Constant>(Op)) {
+ unsigned Score = (isa<UndefValue>(Op)) ? GoodScore : BestScore;
+ if (Score > BestOp.Score) {
+ BestOp.Idx = Idx;
+ BestOp.Score = Score;
+ }
+ }
+ break;
+ case ReorderingMode::Splat:
+ if (Op == OpLastLane)
+ BestOp.Idx = Idx;
+ break;
+ case ReorderingMode::Failed:
+ return None;
+ }
+ }
+
+ if (BestOp.Idx) {
+ getData(BestOp.Idx.getValue(), Lane).IsUsed = true;
+ return BestOp.Idx;
+ }
+ // If we could not find a good match return None.
+ return None;
+ }
+
+ /// Helper for reorderOperandVecs. \Returns the lane that we should start
+ /// reordering from. This is the one which has the least number of operands
+ /// that can freely move about.
+ unsigned getBestLaneToStartReordering() const {
+ unsigned BestLane = 0;
+ unsigned Min = UINT_MAX;
+ for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
+ ++Lane) {
+ unsigned NumFreeOps = getMaxNumOperandsThatCanBeReordered(Lane);
+ if (NumFreeOps < Min) {
+ Min = NumFreeOps;
+ BestLane = Lane;
+ }
+ }
+ return BestLane;
+ }
+
+ /// \Returns the maximum number of operands that are allowed to be reordered
+ /// for \p Lane. This is used as a heuristic for selecting the first lane to
+ /// start operand reordering.
+ unsigned getMaxNumOperandsThatCanBeReordered(unsigned Lane) const {
+ unsigned CntTrue = 0;
+ unsigned NumOperands = getNumOperands();
+ // Operands with the same APO can be reordered. We therefore need to count
+ // how many of them we have for each APO, like this: Cnt[APO] = x.
+ // Since we only have two APOs, namely true and false, we can avoid using
+ // a map. Instead we can simply count the number of operands that
+ // correspond to one of them (in this case the 'true' APO), and calculate
+ // the other by subtracting it from the total number of operands.
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
+ if (getData(OpIdx, Lane).APO)
+ ++CntTrue;
+ unsigned CntFalse = NumOperands - CntTrue;
+ return std::max(CntTrue, CntFalse);
+ }
+
+ /// Go through the instructions in VL and append their operands.
+ void appendOperandsOfVL(ArrayRef<Value *> VL) {
+ assert(!VL.empty() && "Bad VL");
+ assert((empty() || VL.size() == getNumLanes()) &&
+ "Expected same number of lanes");
+ assert(isa<Instruction>(VL[0]) && "Expected instruction");
+ unsigned NumOperands = cast<Instruction>(VL[0])->getNumOperands();
+ OpsVec.resize(NumOperands);
+ unsigned NumLanes = VL.size();
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+ OpsVec[OpIdx].resize(NumLanes);
+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+ assert(isa<Instruction>(VL[Lane]) && "Expected instruction");
+ // Our tree has just 3 nodes: the root and two operands.
+ // It is therefore trivial to get the APO. We only need to check the
+ // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
+ // RHS operand. The LHS operand of both add and sub is never attached
+ // to an inversese operation in the linearized form, therefore its APO
+ // is false. The RHS is true only if VL[Lane] is an inverse operation.
+
+ // Since operand reordering is performed on groups of commutative
+ // operations or alternating sequences (e.g., +, -), we can safely
+ // tell the inverse operations by checking commutativity.
+ bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
+ bool APO = (OpIdx == 0) ? false : IsInverseOperation;
+ OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
+ APO, false};
+ }
+ }
+ }
+
+ /// \returns the number of operands.
+ unsigned getNumOperands() const { return OpsVec.size(); }
+
+ /// \returns the number of lanes.
+ unsigned getNumLanes() const { return OpsVec[0].size(); }
+
+ /// \returns the operand value at \p OpIdx and \p Lane.
+ Value *getValue(unsigned OpIdx, unsigned Lane) const {
+ return getData(OpIdx, Lane).V;
+ }
+ /// \returns true if the data structure is empty.
+ bool empty() const { return OpsVec.empty(); }
+
+ /// Clears the data.
+ void clear() { OpsVec.clear(); }
+
+ /// \Returns true if there are enough operands identical to \p Op to fill
+ /// the whole vector.
+ /// Note: This modifies the 'IsUsed' flag, so a cleanUsed() must follow.
+ bool shouldBroadcast(Value *Op, unsigned OpIdx, unsigned Lane) {
+ bool OpAPO = getData(OpIdx, Lane).APO;
+ for (unsigned Ln = 0, Lns = getNumLanes(); Ln != Lns; ++Ln) {
+ if (Ln == Lane)
+ continue;
+ // This is set to true if we found a candidate for broadcast at Lane.
+ bool FoundCandidate = false;
+ for (unsigned OpI = 0, OpE = getNumOperands(); OpI != OpE; ++OpI) {
+ OperandData &Data = getData(OpI, Ln);
+ if (Data.APO != OpAPO || Data.IsUsed)
+ continue;
+ if (Data.V == Op) {
+ FoundCandidate = true;
+ Data.IsUsed = true;
+ break;
+ }
+ }
+ if (!FoundCandidate)
+ return false;
+ }
+ return true;
+ }
+
+ public:
+ /// Initialize with all the operands of the instruction vector \p RootVL.
+ VLOperands(ArrayRef<Value *> RootVL, const DataLayout &DL,
+ ScalarEvolution &SE)
+ : DL(DL), SE(SE) {
+ // Append all the operands of RootVL.
+ appendOperandsOfVL(RootVL);
+ }
+
+ /// \Returns a value vector with the operands across all lanes for the
+ /// opearnd at \p OpIdx.
+ ValueList getVL(unsigned OpIdx) const {
+ ValueList OpVL(OpsVec[OpIdx].size());
+ assert(OpsVec[OpIdx].size() == getNumLanes() &&
+ "Expected same num of lanes across all operands");
+ for (unsigned Lane = 0, Lanes = getNumLanes(); Lane != Lanes; ++Lane)
+ OpVL[Lane] = OpsVec[OpIdx][Lane].V;
+ return OpVL;
+ }
+
+ // Performs operand reordering for 2 or more operands.
+ // The original operands are in OrigOps[OpIdx][Lane].
+ // The reordered operands are returned in 'SortedOps[OpIdx][Lane]'.
+ void reorder() {
+ unsigned NumOperands = getNumOperands();
+ unsigned NumLanes = getNumLanes();
+ // Each operand has its own mode. We are using this mode to help us select
+ // the instructions for each lane, so that they match best with the ones
+ // we have selected so far.
+ SmallVector<ReorderingMode, 2> ReorderingModes(NumOperands);
+
+ // This is a greedy single-pass algorithm. We are going over each lane
+ // once and deciding on the best order right away with no back-tracking.
+ // However, in order to increase its effectiveness, we start with the lane
+ // that has operands that can move the least. For example, given the
+ // following lanes:
+ // Lane 0 : A[0] = B[0] + C[0] // Visited 3rd
+ // Lane 1 : A[1] = C[1] - B[1] // Visited 1st
+ // Lane 2 : A[2] = B[2] + C[2] // Visited 2nd
+ // Lane 3 : A[3] = C[3] - B[3] // Visited 4th
+ // we will start at Lane 1, since the operands of the subtraction cannot
+ // be reordered. Then we will visit the rest of the lanes in a circular
+ // fashion. That is, Lanes 2, then Lane 0, and finally Lane 3.
+
+ // Find the first lane that we will start our search from.
+ unsigned FirstLane = getBestLaneToStartReordering();
+
+ // Initialize the modes.
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+ Value *OpLane0 = getValue(OpIdx, FirstLane);
+ // Keep track if we have instructions with all the same opcode on one
+ // side.
+ if (isa<LoadInst>(OpLane0))
+ ReorderingModes[OpIdx] = ReorderingMode::Load;
+ else if (isa<Instruction>(OpLane0)) {
+ // Check if OpLane0 should be broadcast.
+ if (shouldBroadcast(OpLane0, OpIdx, FirstLane))
+ ReorderingModes[OpIdx] = ReorderingMode::Splat;
+ else
+ ReorderingModes[OpIdx] = ReorderingMode::Opcode;
+ }
+ else if (isa<Constant>(OpLane0))
+ ReorderingModes[OpIdx] = ReorderingMode::Constant;
+ else if (isa<Argument>(OpLane0))
+ // Our best hope is a Splat. It may save some cost in some cases.
+ ReorderingModes[OpIdx] = ReorderingMode::Splat;
+ else
+ // NOTE: This should be unreachable.
+ ReorderingModes[OpIdx] = ReorderingMode::Failed;
+ }
+
+ // If the initial strategy fails for any of the operand indexes, then we
+ // perform reordering again in a second pass. This helps avoid assigning
+ // high priority to the failed strategy, and should improve reordering for
+ // the non-failed operand indexes.
+ for (int Pass = 0; Pass != 2; ++Pass) {
+ // Skip the second pass if the first pass did not fail.
+ bool StrategyFailed = false;
+ // Mark all operand data as free to use.
+ clearUsed();
+ // We keep the original operand order for the FirstLane, so reorder the
+ // rest of the lanes. We are visiting the nodes in a circular fashion,
+ // using FirstLane as the center point and increasing the radius
+ // distance.
+ for (unsigned Distance = 1; Distance != NumLanes; ++Distance) {
+ // Visit the lane on the right and then the lane on the left.
+ for (int Direction : {+1, -1}) {
+ int Lane = FirstLane + Direction * Distance;
+ if (Lane < 0 || Lane >= (int)NumLanes)
+ continue;
+ int LastLane = Lane - Direction;
+ assert(LastLane >= 0 && LastLane < (int)NumLanes &&
+ "Out of bounds");
+ // Look for a good match for each operand.
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+ // Search for the operand that matches SortedOps[OpIdx][Lane-1].
+ Optional<unsigned> BestIdx =
+ getBestOperand(OpIdx, Lane, LastLane, ReorderingModes);
+ // By not selecting a value, we allow the operands that follow to
+ // select a better matching value. We will get a non-null value in
+ // the next run of getBestOperand().
+ if (BestIdx) {
+ // Swap the current operand with the one returned by
+ // getBestOperand().
+ swap(OpIdx, BestIdx.getValue(), Lane);
+ } else {
+ // We failed to find a best operand, set mode to 'Failed'.
+ ReorderingModes[OpIdx] = ReorderingMode::Failed;
+ // Enable the second pass.
+ StrategyFailed = true;
+ }
+ }
+ }
+ }
+ // Skip second pass if the strategy did not fail.
+ if (!StrategyFailed)
+ break;
+ }
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD static StringRef getModeStr(ReorderingMode RMode) {
+ switch (RMode) {
+ case ReorderingMode::Load:
+ return "Load";
+ case ReorderingMode::Opcode:
+ return "Opcode";
+ case ReorderingMode::Constant:
+ return "Constant";
+ case ReorderingMode::Splat:
+ return "Splat";
+ case ReorderingMode::Failed:
+ return "Failed";
+ }
+ llvm_unreachable("Unimplemented Reordering Type");
+ }
+
+ LLVM_DUMP_METHOD static raw_ostream &printMode(ReorderingMode RMode,
+ raw_ostream &OS) {
+ return OS << getModeStr(RMode);
+ }
+
+ /// Debug print.
+ LLVM_DUMP_METHOD static void dumpMode(ReorderingMode RMode) {
+ printMode(RMode, dbgs());
+ }
+
+ friend raw_ostream &operator<<(raw_ostream &OS, ReorderingMode RMode) {
+ return printMode(RMode, OS);
+ }
+
+ LLVM_DUMP_METHOD raw_ostream &print(raw_ostream &OS) const {
+ const unsigned Indent = 2;
+ unsigned Cnt = 0;
+ for (const OperandDataVec &OpDataVec : OpsVec) {
+ OS << "Operand " << Cnt++ << "\n";
+ for (const OperandData &OpData : OpDataVec) {
+ OS.indent(Indent) << "{";
+ if (Value *V = OpData.V)
+ OS << *V;
+ else
+ OS << "null";
+ OS << ", APO:" << OpData.APO << "}\n";
+ }
+ OS << "\n";
+ }
+ return OS;
+ }
+
+ /// Debug print.
+ LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
+#endif
+ };
+
+private:
/// Checks if all users of \p I are the part of the vectorization tree.
bool areAllUsersVectorized(Instruction *I) const;
@@ -613,7 +1125,8 @@ private:
int getEntryCost(TreeEntry *E);
/// This is the recursive part of buildTree.
- void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth, int);
+ void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth,
+ const EdgeInfo &EI);
/// \returns true if the ExtractElement/ExtractValue instructions in \p VL can
/// be vectorized to use the original vector (or aggregate "bitcast" to a
@@ -631,12 +1144,12 @@ private:
/// \returns the scalarization cost for this type. Scalarization in this
/// context means the creation of vectors from a group of scalars.
- int getGatherCost(Type *Ty, const DenseSet<unsigned> &ShuffledIndices);
+ int getGatherCost(Type *Ty, const DenseSet<unsigned> &ShuffledIndices) const;
/// \returns the scalarization cost for this list of values. Assuming that
/// this subtree gets vectorized, we may need to extract the values from the
/// roots. This method calculates the cost of extracting the values.
- int getGatherCost(ArrayRef<Value *> VL);
+ int getGatherCost(ArrayRef<Value *> VL) const;
/// Set the Builder insert point to one after the last instruction in
/// the bundle
@@ -648,22 +1161,18 @@ private:
/// \returns whether the VectorizableTree is fully vectorizable and will
/// be beneficial even the tree height is tiny.
- bool isFullyVectorizableTinyTree();
+ bool isFullyVectorizableTinyTree() const;
- /// \reorder commutative operands in alt shuffle if they result in
- /// vectorized code.
- void reorderAltShuffleOperands(const InstructionsState &S,
- ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right);
-
- /// \reorder commutative operands to get better probability of
+ /// Reorder commutative or alt operands to get better probability of
/// generating vectorized code.
- void reorderInputsAccordingToOpcode(unsigned Opcode, ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right);
+ static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right,
+ const DataLayout &DL,
+ ScalarEvolution &SE);
struct TreeEntry {
- TreeEntry(std::vector<TreeEntry> &Container) : Container(Container) {}
+ using VecTreeTy = SmallVector<std::unique_ptr<TreeEntry>, 8>;
+ TreeEntry(VecTreeTy &Container) : Container(Container) {}
/// \returns true if the scalars in VL are equal to this entry.
bool isSame(ArrayRef<Value *> VL) const {
@@ -696,20 +1205,103 @@ private:
/// to be a pointer and needs to be able to initialize the child iterator.
/// Thus we need a reference back to the container to translate the indices
/// to entries.
- std::vector<TreeEntry> &Container;
+ VecTreeTy &Container;
/// The TreeEntry index containing the user of this entry. We can actually
/// have multiple users so the data structure is not truly a tree.
- SmallVector<int, 1> UserTreeIndices;
+ SmallVector<EdgeInfo, 1> UserTreeIndices;
+
+ /// The index of this treeEntry in VectorizableTree.
+ int Idx = -1;
+
+ private:
+ /// The operands of each instruction in each lane Operands[op_index][lane].
+ /// Note: This helps avoid the replication of the code that performs the
+ /// reordering of operands during buildTree_rec() and vectorizeTree().
+ SmallVector<ValueList, 2> Operands;
+
+ public:
+ /// Set this bundle's \p OpIdx'th operand to \p OpVL.
+ void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL,
+ ArrayRef<unsigned> ReuseShuffleIndices) {
+ if (Operands.size() < OpIdx + 1)
+ Operands.resize(OpIdx + 1);
+ assert(Operands[OpIdx].size() == 0 && "Already resized?");
+ Operands[OpIdx].resize(Scalars.size());
+ for (unsigned Lane = 0, E = Scalars.size(); Lane != E; ++Lane)
+ Operands[OpIdx][Lane] = (!ReuseShuffleIndices.empty())
+ ? OpVL[ReuseShuffleIndices[Lane]]
+ : OpVL[Lane];
+ }
+
+ /// If there is a user TreeEntry, then set its operand.
+ void trySetUserTEOperand(const EdgeInfo &UserTreeIdx,
+ ArrayRef<Value *> OpVL,
+ ArrayRef<unsigned> ReuseShuffleIndices) {
+ if (UserTreeIdx.UserTE)
+ UserTreeIdx.UserTE->setOperand(UserTreeIdx.EdgeIdx, OpVL,
+ ReuseShuffleIndices);
+ }
+
+ /// \returns the \p OpIdx operand of this TreeEntry.
+ ValueList &getOperand(unsigned OpIdx) {
+ assert(OpIdx < Operands.size() && "Off bounds");
+ return Operands[OpIdx];
+ }
+
+ /// \return the single \p OpIdx operand.
+ Value *getSingleOperand(unsigned OpIdx) const {
+ assert(OpIdx < Operands.size() && "Off bounds");
+ assert(!Operands[OpIdx].empty() && "No operand available");
+ return Operands[OpIdx][0];
+ }
+
+#ifndef NDEBUG
+ /// Debug printer.
+ LLVM_DUMP_METHOD void dump() const {
+ dbgs() << Idx << ".\n";
+ for (unsigned OpI = 0, OpE = Operands.size(); OpI != OpE; ++OpI) {
+ dbgs() << "Operand " << OpI << ":\n";
+ for (const Value *V : Operands[OpI])
+ dbgs().indent(2) << *V << "\n";
+ }
+ dbgs() << "Scalars: \n";
+ for (Value *V : Scalars)
+ dbgs().indent(2) << *V << "\n";
+ dbgs() << "NeedToGather: " << NeedToGather << "\n";
+ dbgs() << "VectorizedValue: ";
+ if (VectorizedValue)
+ dbgs() << *VectorizedValue;
+ else
+ dbgs() << "NULL";
+ dbgs() << "\n";
+ dbgs() << "ReuseShuffleIndices: ";
+ if (ReuseShuffleIndices.empty())
+ dbgs() << "Emtpy";
+ else
+ for (unsigned Idx : ReuseShuffleIndices)
+ dbgs() << Idx << ", ";
+ dbgs() << "\n";
+ dbgs() << "ReorderIndices: ";
+ for (unsigned Idx : ReorderIndices)
+ dbgs() << Idx << ", ";
+ dbgs() << "\n";
+ dbgs() << "UserTreeIndices: ";
+ for (const auto &EInfo : UserTreeIndices)
+ dbgs() << EInfo << ", ";
+ dbgs() << "\n";
+ }
+#endif
};
/// Create a new VectorizableTree entry.
- void newTreeEntry(ArrayRef<Value *> VL, bool Vectorized, int &UserTreeIdx,
- ArrayRef<unsigned> ReuseShuffleIndices = None,
- ArrayRef<unsigned> ReorderIndices = None) {
- VectorizableTree.emplace_back(VectorizableTree);
- int idx = VectorizableTree.size() - 1;
- TreeEntry *Last = &VectorizableTree[idx];
+ TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized,
+ const EdgeInfo &UserTreeIdx,
+ ArrayRef<unsigned> ReuseShuffleIndices = None,
+ ArrayRef<unsigned> ReorderIndices = None) {
+ VectorizableTree.push_back(llvm::make_unique<TreeEntry>(VectorizableTree));
+ TreeEntry *Last = VectorizableTree.back().get();
+ Last->Idx = VectorizableTree.size() - 1;
Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
Last->NeedToGather = !Vectorized;
Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
@@ -718,25 +1310,44 @@ private:
if (Vectorized) {
for (int i = 0, e = VL.size(); i != e; ++i) {
assert(!getTreeEntry(VL[i]) && "Scalar already in tree!");
- ScalarToTreeEntry[VL[i]] = idx;
+ ScalarToTreeEntry[VL[i]] = Last->Idx;
}
} else {
MustGather.insert(VL.begin(), VL.end());
}
- if (UserTreeIdx >= 0)
+ if (UserTreeIdx.UserTE)
Last->UserTreeIndices.push_back(UserTreeIdx);
- UserTreeIdx = idx;
+
+ Last->trySetUserTEOperand(UserTreeIdx, VL, ReuseShuffleIndices);
+ return Last;
}
/// -- Vectorization State --
/// Holds all of the tree entries.
- std::vector<TreeEntry> VectorizableTree;
+ TreeEntry::VecTreeTy VectorizableTree;
+
+#ifndef NDEBUG
+ /// Debug printer.
+ LLVM_DUMP_METHOD void dumpVectorizableTree() const {
+ for (unsigned Id = 0, IdE = VectorizableTree.size(); Id != IdE; ++Id) {
+ VectorizableTree[Id]->dump();
+ dbgs() << "\n";
+ }
+ }
+#endif
TreeEntry *getTreeEntry(Value *V) {
auto I = ScalarToTreeEntry.find(V);
if (I != ScalarToTreeEntry.end())
- return &VectorizableTree[I->second];
+ return VectorizableTree[I->second].get();
+ return nullptr;
+ }
+
+ const TreeEntry *getTreeEntry(Value *V) const {
+ auto I = ScalarToTreeEntry.find(V);
+ if (I != ScalarToTreeEntry.end())
+ return VectorizableTree[I->second].get();
return nullptr;
}
@@ -1246,21 +1857,25 @@ template <> struct GraphTraits<BoUpSLP *> {
/// NodeRef has to be a pointer per the GraphWriter.
using NodeRef = TreeEntry *;
+ using ContainerTy = BoUpSLP::TreeEntry::VecTreeTy;
+
/// Add the VectorizableTree to the index iterator to be able to return
/// TreeEntry pointers.
struct ChildIteratorType
- : public iterator_adaptor_base<ChildIteratorType,
- SmallVector<int, 1>::iterator> {
- std::vector<TreeEntry> &VectorizableTree;
+ : public iterator_adaptor_base<
+ ChildIteratorType, SmallVector<BoUpSLP::EdgeInfo, 1>::iterator> {
+ ContainerTy &VectorizableTree;
- ChildIteratorType(SmallVector<int, 1>::iterator W,
- std::vector<TreeEntry> &VT)
+ ChildIteratorType(SmallVector<BoUpSLP::EdgeInfo, 1>::iterator W,
+ ContainerTy &VT)
: ChildIteratorType::iterator_adaptor_base(W), VectorizableTree(VT) {}
- NodeRef operator*() { return &VectorizableTree[*I]; }
+ NodeRef operator*() { return I->UserTE; }
};
- static NodeRef getEntryNode(BoUpSLP &R) { return &R.VectorizableTree[0]; }
+ static NodeRef getEntryNode(BoUpSLP &R) {
+ return R.VectorizableTree[0].get();
+ }
static ChildIteratorType child_begin(NodeRef N) {
return {N->UserTreeIndices.begin(), N->Container};
@@ -1272,7 +1887,19 @@ template <> struct GraphTraits<BoUpSLP *> {
/// For the node iterator we just need to turn the TreeEntry iterator into a
/// TreeEntry* iterator so that it dereferences to NodeRef.
- using nodes_iterator = pointer_iterator<std::vector<TreeEntry>::iterator>;
+ class nodes_iterator {
+ using ItTy = ContainerTy::iterator;
+ ItTy It;
+
+ public:
+ nodes_iterator(const ItTy &It2) : It(It2) {}
+ NodeRef operator*() { return It->get(); }
+ nodes_iterator operator++() {
+ ++It;
+ return *this;
+ }
+ bool operator!=(const nodes_iterator &N2) const { return N2.It != It; }
+ };
static nodes_iterator nodes_begin(BoUpSLP *R) {
return nodes_iterator(R->VectorizableTree.begin());
@@ -1331,11 +1958,11 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
UserIgnoreList = UserIgnoreLst;
if (!allSameType(Roots))
return;
- buildTree_rec(Roots, 0, -1);
+ buildTree_rec(Roots, 0, EdgeInfo());
// Collect the values that we need to extract from the tree.
- for (TreeEntry &EIdx : VectorizableTree) {
- TreeEntry *Entry = &EIdx;
+ for (auto &TEPtr : VectorizableTree) {
+ TreeEntry *Entry = TEPtr.get();
// No need to handle users of gathered values.
if (Entry->NeedToGather)
@@ -1393,7 +2020,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
}
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
- int UserTreeIdx) {
+ const EdgeInfo &UserTreeIdx) {
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
InstructionsState S = getSameOpcode(VL);
@@ -1450,6 +2077,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
E->UserTreeIndices.push_back(UserTreeIdx);
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
<< ".\n");
+ E->trySetUserTEOperand(UserTreeIdx, VL, None);
return;
}
@@ -1468,8 +2096,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// If any of the scalars is marked as a value that needs to stay scalar, then
// we need to gather the scalars.
+ // The reduction nodes (stored in UserIgnoreList) also should stay scalar.
for (unsigned i = 0, e = VL.size(); i != e; ++i) {
- if (MustGather.count(VL[i])) {
+ if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
newTreeEntry(VL, false, UserTreeIdx);
return;
@@ -1548,7 +2177,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
@@ -1558,7 +2187,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Operands.push_back(cast<PHINode>(j)->getIncomingValueForBlock(
PH->getIncomingBlock(i)));
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
}
@@ -1571,6 +2200,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
++NumOpsWantToKeepOriginalOrder;
newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
ReuseShuffleIndicies);
+ // This is a special case, as it does not gather, but at the same time
+ // we are not extending buildTree_rec() towards the operands.
+ ValueList Op0;
+ Op0.assign(VL.size(), VL0->getOperand(0));
+ VectorizableTree.back()->setOperand(0, Op0, ReuseShuffleIndicies);
return;
}
if (!CurrentOrder.empty()) {
@@ -1588,6 +2222,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
++StoredCurrentOrderAndNum->getSecond();
newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, ReuseShuffleIndicies,
StoredCurrentOrderAndNum->getFirst());
+ // This is a special case, as it does not gather, but at the same time
+ // we are not extending buildTree_rec() towards the operands.
+ ValueList Op0;
+ Op0.assign(VL.size(), VL0->getOperand(0));
+ VectorizableTree.back()->setOperand(0, Op0, ReuseShuffleIndicies);
return;
}
LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
@@ -1693,7 +2332,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
@@ -1702,7 +2341,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
}
@@ -1710,10 +2349,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
case Instruction::FCmp: {
// Check that all of the compares have the same predicate.
CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
+ CmpInst::Predicate SwapP0 = CmpInst::getSwappedPredicate(P0);
Type *ComparedTy = VL0->getOperand(0)->getType();
for (unsigned i = 1, e = VL.size(); i < e; ++i) {
CmpInst *Cmp = cast<CmpInst>(VL[i]);
- if (Cmp->getPredicate() != P0 ||
+ if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) ||
Cmp->getOperand(0)->getType() != ComparedTy) {
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
@@ -1723,20 +2363,34 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
- for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
- ValueList Operands;
- // Prepare the operand vector.
- for (Value *j : VL)
- Operands.push_back(cast<Instruction>(j)->getOperand(i));
-
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ ValueList Left, Right;
+ if (cast<CmpInst>(VL0)->isCommutative()) {
+ // Commutative predicate - collect + sort operands of the instructions
+ // so that each side is more likely to have the same opcode.
+ assert(P0 == SwapP0 && "Commutative Predicate mismatch");
+ reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
+ } else {
+ // Collect operands - commute if it uses the swapped predicate.
+ for (Value *V : VL) {
+ auto *Cmp = cast<CmpInst>(V);
+ Value *LHS = Cmp->getOperand(0);
+ Value *RHS = Cmp->getOperand(1);
+ if (Cmp->getPredicate() != P0)
+ std::swap(LHS, RHS);
+ Left.push_back(LHS);
+ Right.push_back(RHS);
+ }
}
+
+ buildTree_rec(Left, Depth + 1, {TE, 0});
+ buildTree_rec(Right, Depth + 1, {TE, 1});
return;
}
case Instruction::Select:
+ case Instruction::FNeg:
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -1754,17 +2408,17 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
- case Instruction::Xor:
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
+ case Instruction::Xor: {
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
// Sort operands of the instructions so that each side is more likely to
// have the same opcode.
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
- reorderInputsAccordingToOpcode(S.getOpcode(), VL, Left, Right);
- buildTree_rec(Left, Depth + 1, UserTreeIdx);
- buildTree_rec(Right, Depth + 1, UserTreeIdx);
+ reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
+ buildTree_rec(Left, Depth + 1, {TE, 0});
+ buildTree_rec(Right, Depth + 1, {TE, 1});
return;
}
@@ -1774,10 +2428,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
-
+ }
case Instruction::GetElementPtr: {
// We don't combine GEPs with complicated (nested) indexing.
for (unsigned j = 0; j < VL.size(); ++j) {
@@ -1815,7 +2469,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
for (unsigned i = 0, e = 2; i < e; ++i) {
ValueList Operands;
@@ -1823,7 +2477,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
}
@@ -1837,14 +2491,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
ValueList Operands;
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(0));
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, 0});
return;
}
case Instruction::Call: {
@@ -1860,9 +2514,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
Function *Int = CI->getCalledFunction();
- Value *A1I = nullptr;
- if (hasVectorInstrinsicScalarOpd(ID, 1))
- A1I = CI->getArgOperand(1);
+ unsigned NumArgs = CI->getNumArgOperands();
+ SmallVector<Value*, 4> ScalarArgs(NumArgs, nullptr);
+ for (unsigned j = 0; j != NumArgs; ++j)
+ if (hasVectorInstrinsicScalarOpd(ID, j))
+ ScalarArgs[j] = CI->getArgOperand(j);
for (unsigned i = 1, e = VL.size(); i != e; ++i) {
CallInst *CI2 = dyn_cast<CallInst>(VL[i]);
if (!CI2 || CI2->getCalledFunction() != Int ||
@@ -1874,16 +2530,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
<< "\n");
return;
}
- // ctlz,cttz and powi are special intrinsics whose second argument
- // should be same in order for them to be vectorized.
- if (hasVectorInstrinsicScalarOpd(ID, 1)) {
- Value *A1J = CI2->getArgOperand(1);
- if (A1I != A1J) {
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
- << " argument " << A1I << "!=" << A1J << "\n");
- return;
+ // Some intrinsics have scalar arguments and should be same in order for
+ // them to be vectorized.
+ for (unsigned j = 0; j != NumArgs; ++j) {
+ if (hasVectorInstrinsicScalarOpd(ID, j)) {
+ Value *A1J = CI2->getArgOperand(j);
+ if (ScalarArgs[j] != A1J) {
+ BS.cancelScheduling(VL, VL0);
+ newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
+ << " argument " << ScalarArgs[j] << "!=" << A1J
+ << "\n");
+ return;
+ }
}
}
// Verify that the bundle operands are identical between the two calls.
@@ -1899,7 +2558,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
ValueList Operands;
// Prepare the operand vector.
@@ -1907,11 +2566,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
CallInst *CI2 = dyn_cast<CallInst>(j);
Operands.push_back(CI2->getArgOperand(i));
}
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
}
- case Instruction::ShuffleVector:
+ case Instruction::ShuffleVector: {
// If this is not an alternate sequence of opcode like add-sub
// then do not vectorize this instruction.
if (!S.isAltShuffle()) {
@@ -1920,15 +2579,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
return;
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
// Reorder operands if reordering would enable vectorization.
if (isa<BinaryOperator>(VL0)) {
ValueList Left, Right;
- reorderAltShuffleOperands(S, VL, Left, Right);
- buildTree_rec(Left, Depth + 1, UserTreeIdx);
- buildTree_rec(Right, Depth + 1, UserTreeIdx);
+ reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
+ buildTree_rec(Left, Depth + 1, {TE, 0});
+ buildTree_rec(Right, Depth + 1, {TE, 1});
return;
}
@@ -1938,10 +2597,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
-
+ }
default:
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
@@ -2223,6 +2882,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
int VecCost = TTI->getCmpSelInstrCost(S.getOpcode(), VecTy, MaskTy, VL0);
return ReuseShuffleCost + VecCost - ScalarCost;
}
+ case Instruction::FNeg:
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -2260,7 +2920,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
ConstantInt *CInt0 = nullptr;
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
const Instruction *I = cast<Instruction>(VL[i]);
- ConstantInt *CInt = dyn_cast<ConstantInt>(I->getOperand(1));
+ unsigned OpIdx = isa<BinaryOperator>(I) ? 1 : 0;
+ ConstantInt *CInt = dyn_cast<ConstantInt>(I->getOperand(OpIdx));
if (!CInt) {
Op2VK = TargetTransformInfo::OK_AnyValue;
Op2VP = TargetTransformInfo::OP_None;
@@ -2413,31 +3074,31 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
}
-bool BoUpSLP::isFullyVectorizableTinyTree() {
+bool BoUpSLP::isFullyVectorizableTinyTree() const {
LLVM_DEBUG(dbgs() << "SLP: Check whether the tree with height "
<< VectorizableTree.size() << " is fully vectorizable .\n");
// We only handle trees of heights 1 and 2.
- if (VectorizableTree.size() == 1 && !VectorizableTree[0].NeedToGather)
+ if (VectorizableTree.size() == 1 && !VectorizableTree[0]->NeedToGather)
return true;
if (VectorizableTree.size() != 2)
return false;
// Handle splat and all-constants stores.
- if (!VectorizableTree[0].NeedToGather &&
- (allConstant(VectorizableTree[1].Scalars) ||
- isSplat(VectorizableTree[1].Scalars)))
+ if (!VectorizableTree[0]->NeedToGather &&
+ (allConstant(VectorizableTree[1]->Scalars) ||
+ isSplat(VectorizableTree[1]->Scalars)))
return true;
// Gathering cost would be too much for tiny trees.
- if (VectorizableTree[0].NeedToGather || VectorizableTree[1].NeedToGather)
+ if (VectorizableTree[0]->NeedToGather || VectorizableTree[1]->NeedToGather)
return false;
return true;
}
-bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() {
+bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() const {
// We can vectorize the tree if its size is greater than or equal to the
// minimum size specified by the MinTreeSize command line option.
if (VectorizableTree.size() >= MinTreeSize)
@@ -2457,19 +3118,19 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() {
return true;
}
-int BoUpSLP::getSpillCost() {
+int BoUpSLP::getSpillCost() const {
// Walk from the bottom of the tree to the top, tracking which values are
// live. When we see a call instruction that is not part of our tree,
// query TTI to see if there is a cost to keeping values live over it
// (for example, if spills and fills are required).
- unsigned BundleWidth = VectorizableTree.front().Scalars.size();
+ unsigned BundleWidth = VectorizableTree.front()->Scalars.size();
int Cost = 0;
SmallPtrSet<Instruction*, 4> LiveValues;
Instruction *PrevInst = nullptr;
- for (const auto &N : VectorizableTree) {
- Instruction *Inst = dyn_cast<Instruction>(N.Scalars[0]);
+ for (const auto &TEPtr : VectorizableTree) {
+ Instruction *Inst = dyn_cast<Instruction>(TEPtr->Scalars[0]);
if (!Inst)
continue;
@@ -2494,6 +3155,7 @@ int BoUpSLP::getSpillCost() {
});
// Now find the sequence of instructions between PrevInst and Inst.
+ unsigned NumCalls = 0;
BasicBlock::reverse_iterator InstIt = ++Inst->getIterator().getReverse(),
PrevInstIt =
PrevInst->getIterator().getReverse();
@@ -2506,16 +3168,19 @@ int BoUpSLP::getSpillCost() {
// Debug informations don't impact spill cost.
if ((isa<CallInst>(&*PrevInstIt) &&
!isa<DbgInfoIntrinsic>(&*PrevInstIt)) &&
- &*PrevInstIt != PrevInst) {
- SmallVector<Type*, 4> V;
- for (auto *II : LiveValues)
- V.push_back(VectorType::get(II->getType(), BundleWidth));
- Cost += TTI->getCostOfKeepingLiveOverCall(V);
- }
+ &*PrevInstIt != PrevInst)
+ NumCalls++;
++PrevInstIt;
}
+ if (NumCalls) {
+ SmallVector<Type*, 4> V;
+ for (auto *II : LiveValues)
+ V.push_back(VectorType::get(II->getType(), BundleWidth));
+ Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(V);
+ }
+
PrevInst = Inst;
}
@@ -2527,10 +3192,10 @@ int BoUpSLP::getTreeCost() {
LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
<< VectorizableTree.size() << ".\n");
- unsigned BundleWidth = VectorizableTree[0].Scalars.size();
+ unsigned BundleWidth = VectorizableTree[0]->Scalars.size();
for (unsigned I = 0, E = VectorizableTree.size(); I < E; ++I) {
- TreeEntry &TE = VectorizableTree[I];
+ TreeEntry &TE = *VectorizableTree[I].get();
// We create duplicate tree entries for gather sequences that have multiple
// uses. However, we should not compute the cost of duplicate sequences.
@@ -2545,10 +3210,11 @@ int BoUpSLP::getTreeCost() {
// existing heuristics based on tree size may yield different results.
//
if (TE.NeedToGather &&
- std::any_of(std::next(VectorizableTree.begin(), I + 1),
- VectorizableTree.end(), [TE](TreeEntry &Entry) {
- return Entry.NeedToGather && Entry.isSame(TE.Scalars);
- }))
+ std::any_of(
+ std::next(VectorizableTree.begin(), I + 1), VectorizableTree.end(),
+ [TE](const std::unique_ptr<TreeEntry> &EntryPtr) {
+ return EntryPtr->NeedToGather && EntryPtr->isSame(TE.Scalars);
+ }))
continue;
int C = getEntryCost(&TE);
@@ -2575,7 +3241,7 @@ int BoUpSLP::getTreeCost() {
// extend the extracted value back to the original type. Here, we account
// for the extract and the added cost of the sign extend if needed.
auto *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth);
- auto *ScalarRoot = VectorizableTree[0].Scalars[0];
+ auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
if (MinBWs.count(ScalarRoot)) {
auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
auto Extend =
@@ -2608,17 +3274,17 @@ int BoUpSLP::getTreeCost() {
}
int BoUpSLP::getGatherCost(Type *Ty,
- const DenseSet<unsigned> &ShuffledIndices) {
+ const DenseSet<unsigned> &ShuffledIndices) const {
int Cost = 0;
for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
if (!ShuffledIndices.count(i))
Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
if (!ShuffledIndices.empty())
- Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
+ Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
return Cost;
}
-int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) {
+int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
// Find the type of the operands in VL.
Type *ScalarTy = VL[0]->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
@@ -2638,221 +3304,19 @@ int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) {
return getGatherCost(VecTy, ShuffledElements);
}
-// Reorder commutative operations in alternate shuffle if the resulting vectors
-// are consecutive loads. This would allow us to vectorize the tree.
-// If we have something like-
-// load a[0] - load b[0]
-// load b[1] + load a[1]
-// load a[2] - load b[2]
-// load a[3] + load b[3]
-// Reordering the second load b[1] load a[1] would allow us to vectorize this
-// code.
-void BoUpSLP::reorderAltShuffleOperands(const InstructionsState &S,
- ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right) {
- // Push left and right operands of binary operation into Left and Right
- for (Value *V : VL) {
- auto *I = cast<Instruction>(V);
- assert(S.isOpcodeOrAlt(I) && "Incorrect instruction in vector");
- Left.push_back(I->getOperand(0));
- Right.push_back(I->getOperand(1));
- }
-
- // Reorder if we have a commutative operation and consecutive access
- // are on either side of the alternate instructions.
- for (unsigned j = 0; j < VL.size() - 1; ++j) {
- if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
- if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
- Instruction *VL1 = cast<Instruction>(VL[j]);
- Instruction *VL2 = cast<Instruction>(VL[j + 1]);
- if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) {
- std::swap(Left[j], Right[j]);
- continue;
- } else if (VL2->isCommutative() &&
- isConsecutiveAccess(L, L1, *DL, *SE)) {
- std::swap(Left[j + 1], Right[j + 1]);
- continue;
- }
- // else unchanged
- }
- }
- if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
- if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
- Instruction *VL1 = cast<Instruction>(VL[j]);
- Instruction *VL2 = cast<Instruction>(VL[j + 1]);
- if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) {
- std::swap(Left[j], Right[j]);
- continue;
- } else if (VL2->isCommutative() &&
- isConsecutiveAccess(L, L1, *DL, *SE)) {
- std::swap(Left[j + 1], Right[j + 1]);
- continue;
- }
- // else unchanged
- }
- }
- }
-}
-
-// Return true if I should be commuted before adding it's left and right
-// operands to the arrays Left and Right.
-//
-// The vectorizer is trying to either have all elements one side being
-// instruction with the same opcode to enable further vectorization, or having
-// a splat to lower the vectorizing cost.
-static bool shouldReorderOperands(
- int i, unsigned Opcode, Instruction &I, ArrayRef<Value *> Left,
- ArrayRef<Value *> Right, bool AllSameOpcodeLeft, bool AllSameOpcodeRight,
- bool SplatLeft, bool SplatRight, Value *&VLeft, Value *&VRight) {
- VLeft = I.getOperand(0);
- VRight = I.getOperand(1);
- // If we have "SplatRight", try to see if commuting is needed to preserve it.
- if (SplatRight) {
- if (VRight == Right[i - 1])
- // Preserve SplatRight
- return false;
- if (VLeft == Right[i - 1]) {
- // Commuting would preserve SplatRight, but we don't want to break
- // SplatLeft either, i.e. preserve the original order if possible.
- // (FIXME: why do we care?)
- if (SplatLeft && VLeft == Left[i - 1])
- return false;
- return true;
- }
- }
- // Symmetrically handle Right side.
- if (SplatLeft) {
- if (VLeft == Left[i - 1])
- // Preserve SplatLeft
- return false;
- if (VRight == Left[i - 1])
- return true;
- }
-
- Instruction *ILeft = dyn_cast<Instruction>(VLeft);
- Instruction *IRight = dyn_cast<Instruction>(VRight);
-
- // If we have "AllSameOpcodeRight", try to see if the left operands preserves
- // it and not the right, in this case we want to commute.
- if (AllSameOpcodeRight) {
- unsigned RightPrevOpcode = cast<Instruction>(Right[i - 1])->getOpcode();
- if (IRight && RightPrevOpcode == IRight->getOpcode())
- // Do not commute, a match on the right preserves AllSameOpcodeRight
- return false;
- if (ILeft && RightPrevOpcode == ILeft->getOpcode()) {
- // We have a match and may want to commute, but first check if there is
- // not also a match on the existing operands on the Left to preserve
- // AllSameOpcodeLeft, i.e. preserve the original order if possible.
- // (FIXME: why do we care?)
- if (AllSameOpcodeLeft && ILeft &&
- cast<Instruction>(Left[i - 1])->getOpcode() == ILeft->getOpcode())
- return false;
- return true;
- }
- }
- // Symmetrically handle Left side.
- if (AllSameOpcodeLeft) {
- unsigned LeftPrevOpcode = cast<Instruction>(Left[i - 1])->getOpcode();
- if (ILeft && LeftPrevOpcode == ILeft->getOpcode())
- return false;
- if (IRight && LeftPrevOpcode == IRight->getOpcode())
- return true;
- }
- return false;
-}
-
-void BoUpSLP::reorderInputsAccordingToOpcode(unsigned Opcode,
- ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right) {
- if (!VL.empty()) {
- // Peel the first iteration out of the loop since there's nothing
- // interesting to do anyway and it simplifies the checks in the loop.
- auto *I = cast<Instruction>(VL[0]);
- Value *VLeft = I->getOperand(0);
- Value *VRight = I->getOperand(1);
- if (!isa<Instruction>(VRight) && isa<Instruction>(VLeft))
- // Favor having instruction to the right. FIXME: why?
- std::swap(VLeft, VRight);
- Left.push_back(VLeft);
- Right.push_back(VRight);
- }
-
- // Keep track if we have instructions with all the same opcode on one side.
- bool AllSameOpcodeLeft = isa<Instruction>(Left[0]);
- bool AllSameOpcodeRight = isa<Instruction>(Right[0]);
- // Keep track if we have one side with all the same value (broadcast).
- bool SplatLeft = true;
- bool SplatRight = true;
-
- for (unsigned i = 1, e = VL.size(); i != e; ++i) {
- Instruction *I = cast<Instruction>(VL[i]);
- assert(((I->getOpcode() == Opcode && I->isCommutative()) ||
- (I->getOpcode() != Opcode && Instruction::isCommutative(Opcode))) &&
- "Can only process commutative instruction");
- // Commute to favor either a splat or maximizing having the same opcodes on
- // one side.
- Value *VLeft;
- Value *VRight;
- if (shouldReorderOperands(i, Opcode, *I, Left, Right, AllSameOpcodeLeft,
- AllSameOpcodeRight, SplatLeft, SplatRight, VLeft,
- VRight)) {
- Left.push_back(VRight);
- Right.push_back(VLeft);
- } else {
- Left.push_back(VLeft);
- Right.push_back(VRight);
- }
- // Update Splat* and AllSameOpcode* after the insertion.
- SplatRight = SplatRight && (Right[i - 1] == Right[i]);
- SplatLeft = SplatLeft && (Left[i - 1] == Left[i]);
- AllSameOpcodeLeft = AllSameOpcodeLeft && isa<Instruction>(Left[i]) &&
- (cast<Instruction>(Left[i - 1])->getOpcode() ==
- cast<Instruction>(Left[i])->getOpcode());
- AllSameOpcodeRight = AllSameOpcodeRight && isa<Instruction>(Right[i]) &&
- (cast<Instruction>(Right[i - 1])->getOpcode() ==
- cast<Instruction>(Right[i])->getOpcode());
- }
-
- // If one operand end up being broadcast, return this operand order.
- if (SplatRight || SplatLeft)
+// Perform operand reordering on the instructions in VL and return the reordered
+// operands in Left and Right.
+void BoUpSLP::reorderInputsAccordingToOpcode(
+ ArrayRef<Value *> VL, SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right, const DataLayout &DL,
+ ScalarEvolution &SE) {
+ if (VL.empty())
return;
-
- // Finally check if we can get longer vectorizable chain by reordering
- // without breaking the good operand order detected above.
- // E.g. If we have something like-
- // load a[0] load b[0]
- // load b[1] load a[1]
- // load a[2] load b[2]
- // load a[3] load b[3]
- // Reordering the second load b[1] load a[1] would allow us to vectorize
- // this code and we still retain AllSameOpcode property.
- // FIXME: This load reordering might break AllSameOpcode in some rare cases
- // such as-
- // add a[0],c[0] load b[0]
- // add a[1],c[2] load b[1]
- // b[2] load b[2]
- // add a[3],c[3] load b[3]
- for (unsigned j = 0, e = VL.size() - 1; j < e; ++j) {
- if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
- if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
- if (isConsecutiveAccess(L, L1, *DL, *SE)) {
- std::swap(Left[j + 1], Right[j + 1]);
- continue;
- }
- }
- }
- if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
- if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
- if (isConsecutiveAccess(L, L1, *DL, *SE)) {
- std::swap(Left[j + 1], Right[j + 1]);
- continue;
- }
- }
- }
- // else unchanged
- }
+ VLOperands Ops(VL, DL, SE);
+ // Reorder the operands in place.
+ Ops.reorder();
+ Left = Ops.getVL(0);
+ Right = Ops.getVL(1);
}
void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL,
@@ -3082,13 +3546,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
continue;
}
- // Prepare the operand vector.
- for (Value *V : E->Scalars)
- Operands.push_back(cast<PHINode>(V)->getIncomingValueForBlock(IBB));
-
Builder.SetInsertPoint(IBB->getTerminator());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
- Value *Vec = vectorizeTree(Operands);
+ Value *Vec = vectorizeTree(E->getOperand(i));
NewPhi->addIncoming(Vec, IBB);
}
@@ -3099,7 +3559,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::ExtractElement: {
if (!E->NeedToGather) {
- Value *V = VL0->getOperand(0);
+ Value *V = E->getSingleOperand(0);
if (!E->ReorderIndices.empty()) {
OrdersType Mask;
inversePermutation(E->ReorderIndices, Mask);
@@ -3132,11 +3592,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
case Instruction::ExtractValue: {
if (!E->NeedToGather) {
- LoadInst *LI = cast<LoadInst>(VL0->getOperand(0));
+ LoadInst *LI = cast<LoadInst>(E->getSingleOperand(0));
Builder.SetInsertPoint(LI);
PointerType *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace());
Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy);
- LoadInst *V = Builder.CreateAlignedLoad(Ptr, LI->getAlignment());
+ LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlignment());
Value *NewV = propagateMetadata(V, E->Scalars);
if (!E->ReorderIndices.empty()) {
OrdersType Mask;
@@ -3177,13 +3637,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
- ValueList INVL;
- for (Value *V : E->Scalars)
- INVL.push_back(cast<Instruction>(V)->getOperand(0));
-
setInsertPointAfterBundle(E->Scalars, S);
- Value *InVec = vectorizeTree(INVL);
+ Value *InVec = vectorizeTree(E->getOperand(0));
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
@@ -3202,16 +3658,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
case Instruction::FCmp:
case Instruction::ICmp: {
- ValueList LHSV, RHSV;
- for (Value *V : E->Scalars) {
- LHSV.push_back(cast<Instruction>(V)->getOperand(0));
- RHSV.push_back(cast<Instruction>(V)->getOperand(1));
- }
-
setInsertPointAfterBundle(E->Scalars, S);
- Value *L = vectorizeTree(LHSV);
- Value *R = vectorizeTree(RHSV);
+ Value *L = vectorizeTree(E->getOperand(0));
+ Value *R = vectorizeTree(E->getOperand(1));
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
@@ -3235,31 +3685,49 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
case Instruction::Select: {
- ValueList TrueVec, FalseVec, CondVec;
- for (Value *V : E->Scalars) {
- CondVec.push_back(cast<Instruction>(V)->getOperand(0));
- TrueVec.push_back(cast<Instruction>(V)->getOperand(1));
- FalseVec.push_back(cast<Instruction>(V)->getOperand(2));
+ setInsertPointAfterBundle(E->Scalars, S);
+
+ Value *Cond = vectorizeTree(E->getOperand(0));
+ Value *True = vectorizeTree(E->getOperand(1));
+ Value *False = vectorizeTree(E->getOperand(2));
+
+ if (E->VectorizedValue) {
+ LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
+ return E->VectorizedValue;
}
+ Value *V = Builder.CreateSelect(Cond, True, False);
+ if (NeedToShuffleReuses) {
+ V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
+ E->ReuseShuffleIndices, "shuffle");
+ }
+ E->VectorizedValue = V;
+ ++NumVectorInstructions;
+ return V;
+ }
+ case Instruction::FNeg: {
setInsertPointAfterBundle(E->Scalars, S);
- Value *Cond = vectorizeTree(CondVec);
- Value *True = vectorizeTree(TrueVec);
- Value *False = vectorizeTree(FalseVec);
+ Value *Op = vectorizeTree(E->getOperand(0));
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
- Value *V = Builder.CreateSelect(Cond, True, False);
+ Value *V = Builder.CreateUnOp(
+ static_cast<Instruction::UnaryOps>(S.getOpcode()), Op);
+ propagateIRFlags(V, E->Scalars, VL0);
+ if (auto *I = dyn_cast<Instruction>(V))
+ V = propagateMetadata(I, E->Scalars);
+
if (NeedToShuffleReuses) {
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
E->ReuseShuffleIndices, "shuffle");
}
E->VectorizedValue = V;
++NumVectorInstructions;
+
return V;
}
case Instruction::Add:
@@ -3280,21 +3748,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- ValueList LHSVL, RHSVL;
- if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
- reorderInputsAccordingToOpcode(S.getOpcode(), E->Scalars, LHSVL,
- RHSVL);
- else
- for (Value *V : E->Scalars) {
- auto *I = cast<Instruction>(V);
- LHSVL.push_back(I->getOperand(0));
- RHSVL.push_back(I->getOperand(1));
- }
-
setInsertPointAfterBundle(E->Scalars, S);
- Value *LHS = vectorizeTree(LHSVL);
- Value *RHS = vectorizeTree(RHSVL);
+ Value *LHS = vectorizeTree(E->getOperand(0));
+ Value *RHS = vectorizeTree(E->getOperand(1));
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
@@ -3341,7 +3798,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ExternalUses.push_back(ExternalUser(PO, cast<User>(VecPtr), 0));
unsigned Alignment = LI->getAlignment();
- LI = Builder.CreateLoad(VecPtr);
+ LI = Builder.CreateLoad(VecTy, VecPtr);
if (!Alignment) {
Alignment = DL->getABITypeAlignment(ScalarLoadTy);
}
@@ -3367,13 +3824,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
unsigned Alignment = SI->getAlignment();
unsigned AS = SI->getPointerAddressSpace();
- ValueList ScalarStoreValues;
- for (Value *V : E->Scalars)
- ScalarStoreValues.push_back(cast<StoreInst>(V)->getValueOperand());
-
setInsertPointAfterBundle(E->Scalars, S);
- Value *VecValue = vectorizeTree(ScalarStoreValues);
+ Value *VecValue = vectorizeTree(E->getOperand(0));
Value *ScalarPtr = SI->getPointerOperand();
Value *VecPtr = Builder.CreateBitCast(ScalarPtr, VecTy->getPointerTo(AS));
StoreInst *ST = Builder.CreateStore(VecValue, VecPtr);
@@ -3400,20 +3853,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::GetElementPtr: {
setInsertPointAfterBundle(E->Scalars, S);
- ValueList Op0VL;
- for (Value *V : E->Scalars)
- Op0VL.push_back(cast<GetElementPtrInst>(V)->getOperand(0));
-
- Value *Op0 = vectorizeTree(Op0VL);
+ Value *Op0 = vectorizeTree(E->getOperand(0));
std::vector<Value *> OpVecs;
for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
++j) {
- ValueList OpVL;
- for (Value *V : E->Scalars)
- OpVL.push_back(cast<GetElementPtrInst>(V)->getOperand(j));
-
- Value *OpVec = vectorizeTree(OpVL);
+ Value *OpVec = vectorizeTree(E->getOperand(j));
OpVecs.push_back(OpVec);
}
@@ -3443,20 +3888,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
std::vector<Value *> OpVecs;
for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
ValueList OpVL;
- // ctlz,cttz and powi are special intrinsics whose second argument is
- // a scalar. This argument should not be vectorized.
- if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) {
+ // Some intrinsics have scalar arguments. This argument should not be
+ // vectorized.
+ if (hasVectorInstrinsicScalarOpd(IID, j)) {
CallInst *CEI = cast<CallInst>(VL0);
ScalarArg = CEI->getArgOperand(j);
OpVecs.push_back(CEI->getArgOperand(j));
continue;
}
- for (Value *V : E->Scalars) {
- CallInst *CEI = cast<CallInst>(V);
- OpVL.push_back(CEI->getArgOperand(j));
- }
- Value *OpVec = vectorizeTree(OpVL);
+ Value *OpVec = vectorizeTree(E->getOperand(j));
LLVM_DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n");
OpVecs.push_back(OpVec);
}
@@ -3485,7 +3926,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
case Instruction::ShuffleVector: {
- ValueList LHSVL, RHSVL;
assert(S.isAltShuffle() &&
((Instruction::isBinaryOp(S.getOpcode()) &&
Instruction::isBinaryOp(S.getAltOpcode())) ||
@@ -3495,16 +3935,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *LHS, *RHS;
if (Instruction::isBinaryOp(S.getOpcode())) {
- reorderAltShuffleOperands(S, E->Scalars, LHSVL, RHSVL);
setInsertPointAfterBundle(E->Scalars, S);
- LHS = vectorizeTree(LHSVL);
- RHS = vectorizeTree(RHSVL);
+ LHS = vectorizeTree(E->getOperand(0));
+ RHS = vectorizeTree(E->getOperand(1));
} else {
- ValueList INVL;
- for (Value *V : E->Scalars)
- INVL.push_back(cast<Instruction>(V)->getOperand(0));
setInsertPointAfterBundle(E->Scalars, S);
- LHS = vectorizeTree(INVL);
+ LHS = vectorizeTree(E->getOperand(0));
}
if (E->VectorizedValue) {
@@ -3578,20 +4014,20 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
}
Builder.SetInsertPoint(&F->getEntryBlock().front());
- auto *VectorRoot = vectorizeTree(&VectorizableTree[0]);
+ auto *VectorRoot = vectorizeTree(VectorizableTree[0].get());
// If the vectorized tree can be rewritten in a smaller type, we truncate the
// vectorized root. InstCombine will then rewrite the entire expression. We
// sign extend the extracted values below.
- auto *ScalarRoot = VectorizableTree[0].Scalars[0];
+ auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
if (MinBWs.count(ScalarRoot)) {
if (auto *I = dyn_cast<Instruction>(VectorRoot))
Builder.SetInsertPoint(&*++BasicBlock::iterator(I));
- auto BundleWidth = VectorizableTree[0].Scalars.size();
+ auto BundleWidth = VectorizableTree[0]->Scalars.size();
auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
auto *VecTy = VectorType::get(MinTy, BundleWidth);
auto *Trunc = Builder.CreateTrunc(VectorRoot, VecTy);
- VectorizableTree[0].VectorizedValue = Trunc;
+ VectorizableTree[0]->VectorizedValue = Trunc;
}
LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size()
@@ -3687,8 +4123,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
}
// For each vectorized value:
- for (TreeEntry &EIdx : VectorizableTree) {
- TreeEntry *Entry = &EIdx;
+ for (auto &TEPtr : VectorizableTree) {
+ TreeEntry *Entry = TEPtr.get();
// No need to handle users of gathered values.
if (Entry->NeedToGather)
@@ -3721,7 +4157,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
Builder.ClearInsertionPoint();
- return VectorizableTree[0].VectorizedValue;
+ return VectorizableTree[0]->VectorizedValue;
}
void BoUpSLP::optimizeGatherSequence() {
@@ -3767,10 +4203,10 @@ void BoUpSLP::optimizeGatherSequence() {
// Sort blocks by domination. This ensures we visit a block after all blocks
// dominating it are visited.
- std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(),
- [this](const DomTreeNode *A, const DomTreeNode *B) {
- return DT->properlyDominates(A, B);
- });
+ llvm::stable_sort(CSEWorkList,
+ [this](const DomTreeNode *A, const DomTreeNode *B) {
+ return DT->properlyDominates(A, B);
+ });
// Perform O(N^2) search over the gather sequences and merge identical
// instructions. TODO: We can further optimize this scan if we split the
@@ -3989,7 +4425,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
<< "\n");
return true;
}
- UpIter++;
+ ++UpIter;
}
if (DownIter != LowerEnd) {
if (&*DownIter == I) {
@@ -4003,7 +4439,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
<< "\n");
return true;
}
- DownIter++;
+ ++DownIter;
}
assert((UpIter != UpperEnd || DownIter != LowerEnd) &&
"instruction not found in block");
@@ -4253,7 +4689,7 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
BS->ScheduleStart = nullptr;
}
-unsigned BoUpSLP::getVectorElementSize(Value *V) {
+unsigned BoUpSLP::getVectorElementSize(Value *V) const {
// If V is a store, just return the width of the stored value without
// traversing the expression tree. This is the common case.
if (auto *Store = dyn_cast<StoreInst>(V))
@@ -4390,7 +4826,7 @@ void BoUpSLP::computeMinimumValueSizes() {
return;
// We only attempt to truncate integer expressions.
- auto &TreeRoot = VectorizableTree[0].Scalars;
+ auto &TreeRoot = VectorizableTree[0]->Scalars;
auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());
if (!TreeRootIT)
return;
@@ -4411,8 +4847,8 @@ void BoUpSLP::computeMinimumValueSizes() {
// Collect the scalar values of the vectorizable expression. We will use this
// context to determine which values can be demoted. If we see a truncation,
// we mark it as seeding another demotion.
- for (auto &Entry : VectorizableTree)
- Expr.insert(Entry.Scalars.begin(), Entry.Scalars.end());
+ for (auto &EntryPtr : VectorizableTree)
+ Expr.insert(EntryPtr->Scalars.begin(), EntryPtr->Scalars.end());
// Ensure the roots of the vectorizable tree don't form a cycle. They must
// have a single external user that is not in the vectorizable tree.
@@ -4746,38 +5182,29 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
BoUpSLP::ValueSet VectorizedStores;
bool Changed = false;
- // Do a quadratic search on all of the given stores in reverse order and find
- // all of the pairs of stores that follow each other.
- SmallVector<unsigned, 16> IndexQueue;
- unsigned E = Stores.size();
- IndexQueue.resize(E - 1);
- for (unsigned I = E; I > 0; --I) {
- unsigned Idx = I - 1;
- // If a store has multiple consecutive store candidates, search Stores
- // array according to the sequence: Idx-1, Idx+1, Idx-2, Idx+2, ...
- // This is because usually pairing with immediate succeeding or preceding
- // candidate create the best chance to find slp vectorization opportunity.
- unsigned Offset = 1;
- unsigned Cnt = 0;
- for (unsigned J = 0; J < E - 1; ++J, ++Offset) {
- if (Idx >= Offset) {
- IndexQueue[Cnt] = Idx - Offset;
- ++Cnt;
- }
- if (Idx + Offset < E) {
- IndexQueue[Cnt] = Idx + Offset;
- ++Cnt;
- }
- }
+ auto &&FindConsecutiveAccess =
+ [this, &Stores, &Heads, &Tails, &ConsecutiveChain] (int K, int Idx) {
+ if (!isConsecutiveAccess(Stores[K], Stores[Idx], *DL, *SE))
+ return false;
- for (auto K : IndexQueue) {
- if (isConsecutiveAccess(Stores[K], Stores[Idx], *DL, *SE)) {
Tails.insert(Stores[Idx]);
Heads.insert(Stores[K]);
ConsecutiveChain[Stores[K]] = Stores[Idx];
+ return true;
+ };
+
+ // Do a quadratic search on all of the given stores in reverse order and find
+ // all of the pairs of stores that follow each other.
+ int E = Stores.size();
+ for (int Idx = E - 1; Idx >= 0; --Idx) {
+ // If a store has multiple consecutive store candidates, search according
+ // to the sequence: Idx-1, Idx+1, Idx-2, Idx+2, ...
+ // This is because usually pairing with immediate succeeding or preceding
+ // candidate create the best chance to find slp vectorization opportunity.
+ for (int Offset = 1, F = std::max(E - Idx, Idx + 1); Offset < F; ++Offset)
+ if ((Idx >= Offset && FindConsecutiveAccess(Idx - Offset, Idx)) ||
+ (Idx + Offset < E && FindConsecutiveAccess(Idx + Offset, Idx)))
break;
- }
- }
}
// For stores that start but don't end a link in the chain:
@@ -5740,6 +6167,9 @@ public:
unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
Value *VectorizedTree = nullptr;
+
+ // FIXME: Fast-math-flags should be set based on the instructions in the
+ // reduction (not all of 'fast' are required).
IRBuilder<> Builder(cast<Instruction>(ReductionRoot));
FastMathFlags Unsafe;
Unsafe.setFast();
@@ -5929,10 +6359,14 @@ private:
assert(isPowerOf2_32(ReduxWidth) &&
"We only handle power-of-two reductions for now");
- if (!IsPairwiseReduction)
+ if (!IsPairwiseReduction) {
+ // FIXME: The builder should use an FMF guard. It should not be hard-coded
+ // to 'fast'.
+ assert(Builder.getFastMathFlags().isFast() && "Expected 'fast' FMF");
return createSimpleTargetReduction(
Builder, TTI, ReductionData.getOpcode(), VectorizedValue,
ReductionData.getFlags(), ReductionOps.back());
+ }
Value *TmpVec = VectorizedValue;
for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
@@ -6256,7 +6690,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
}
// Sort by type.
- std::stable_sort(Incoming.begin(), Incoming.end(), PhiTypeSorterFunc);
+ llvm::stable_sort(Incoming, PhiTypeSorterFunc);
// Try to vectorize elements base on their type.
for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(),
@@ -6297,7 +6731,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
SmallVector<WeakVH, 8> PostProcessInstructions;
SmallDenseSet<Instruction *, 4> KeyNodes;
- for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
// We may go through BB multiple times so skip the one we have checked.
if (!VisitedInstrs.insert(&*it).second) {
if (it->use_empty() && KeyNodes.count(&*it) > 0 &&
diff --git a/lib/Transforms/Vectorize/VPRecipeBuilder.h b/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 15d38ac9c84c..0ca6a6b93cfd 100644
--- a/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -1,9 +1,8 @@
//===- VPRecipeBuilder.h - Helper class to build recipes --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -30,9 +29,6 @@ class VPRecipeBuilder {
/// Target Library Info.
const TargetLibraryInfo *TLI;
- /// Target Transform Info.
- const TargetTransformInfo *TTI;
-
/// The legality analysis.
LoopVectorizationLegality *Legal;
@@ -105,11 +101,9 @@ public:
public:
VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI,
LoopVectorizationLegality *Legal,
LoopVectorizationCostModel &CM, VPBuilder &Builder)
- : OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
- Builder(Builder) {}
+ : OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), Builder(Builder) {}
/// Check if a recipe can be create for \p I withing the given VF \p Range.
/// If a recipe can be created, it adds it to \p VPBB.
diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp
index 05a5400beb4e..517d759d7bfc 100644
--- a/lib/Transforms/Vectorize/VPlan.cpp
+++ b/lib/Transforms/Vectorize/VPlan.cpp
@@ -1,9 +1,8 @@
//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -374,10 +373,9 @@ void VPlan::execute(VPTransformState *State) {
BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB;
BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor();
assert(VectorHeaderBB && "Loop preheader does not have a single successor.");
- BasicBlock *VectorLatchBB = VectorHeaderBB;
// 1. Make room to generate basic-blocks inside loop body if needed.
- VectorLatchBB = VectorHeaderBB->splitBasicBlock(
+ BasicBlock *VectorLatchBB = VectorHeaderBB->splitBasicBlock(
VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch");
Loop *L = State->LI->getLoopFor(VectorHeaderBB);
L->addBasicBlockToLoop(VectorLatchBB, *State->LI);
@@ -561,6 +559,19 @@ void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
bumpIndent(1);
OS << Indent << "\"" << DOT::EscapeString(BasicBlock->getName()) << ":\\n\"";
bumpIndent(1);
+
+ // Dump the block predicate.
+ const VPValue *Pred = BasicBlock->getPredicate();
+ if (Pred) {
+ OS << " +\n" << Indent << " \"BlockPredicate: ";
+ if (const VPInstruction *PredI = dyn_cast<VPInstruction>(Pred)) {
+ PredI->printAsOperand(OS);
+ OS << " (" << DOT::EscapeString(PredI->getParent()->getName())
+ << ")\\l\"";
+ } else
+ Pred->printAsOperand(OS);
+ }
+
for (const VPRecipeBase &Recipe : *BasicBlock)
Recipe.print(OS, Indent);
diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h
index 5c1b4a83c30e..8a06412ad590 100644
--- a/lib/Transforms/Vectorize/VPlan.h
+++ b/lib/Transforms/Vectorize/VPlan.h
@@ -1,9 +1,8 @@
//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -353,6 +352,9 @@ private:
/// Successor selector, null for zero or single successor blocks.
VPValue *CondBit = nullptr;
+ /// Current block predicate - null if the block does not need a predicate.
+ VPValue *Predicate = nullptr;
+
/// Add \p Successor as the last successor to this block.
void appendSuccessor(VPBlockBase *Successor) {
assert(Successor && "Cannot add nullptr successor!");
@@ -491,6 +493,12 @@ public:
void setCondBit(VPValue *CV) { CondBit = CV; }
+ VPValue *getPredicate() { return Predicate; }
+
+ const VPValue *getPredicate() const { return Predicate; }
+
+ void setPredicate(VPValue *Pred) { Predicate = Pred; }
+
/// Set a given VPBlockBase \p Successor as the single successor of this
/// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
/// This VPBlockBase must have no successors.
@@ -521,6 +529,15 @@ public:
appendPredecessor(Pred);
}
+ /// Remove all the predecessor of this block.
+ void clearPredecessors() { Predecessors.clear(); }
+
+ /// Remove all the successors of this block and set to null its condition bit
+ void clearSuccessors() {
+ Successors.clear();
+ CondBit = nullptr;
+ }
+
/// The method which generates the output IR that correspond to this
/// VPBlockBase, thereby "executing" the VPlan.
virtual void execute(struct VPTransformState *State) = 0;
@@ -1491,6 +1508,41 @@ public:
From->removeSuccessor(To);
To->removePredecessor(From);
}
+
+ /// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge.
+ static bool isBackEdge(const VPBlockBase *FromBlock,
+ const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) {
+ assert(FromBlock->getParent() == ToBlock->getParent() &&
+ FromBlock->getParent() && "Must be in same region");
+ const VPLoop *FromLoop = VPLI->getLoopFor(FromBlock);
+ const VPLoop *ToLoop = VPLI->getLoopFor(ToBlock);
+ if (!FromLoop || !ToLoop || FromLoop != ToLoop)
+ return false;
+
+ // A back-edge is a branch from the loop latch to its header.
+ return ToLoop->isLoopLatch(FromBlock) && ToBlock == ToLoop->getHeader();
+ }
+
+ /// Returns true if \p Block is a loop latch
+ static bool blockIsLoopLatch(const VPBlockBase *Block,
+ const VPLoopInfo *VPLInfo) {
+ if (const VPLoop *ParentVPL = VPLInfo->getLoopFor(Block))
+ return ParentVPL->isLoopLatch(Block);
+
+ return false;
+ }
+
+ /// Count and return the number of succesors of \p PredBlock excluding any
+ /// backedges.
+ static unsigned countSuccessorsNoBE(VPBlockBase *PredBlock,
+ VPLoopInfo *VPLI) {
+ unsigned Count = 0;
+ for (VPBlockBase *SuccBlock : PredBlock->getSuccessors()) {
+ if (!VPBlockUtils::isBackEdge(PredBlock, SuccBlock, VPLI))
+ Count++;
+ }
+ return Count;
+ }
};
class VPInterleavedAccessInfo {
diff --git a/lib/Transforms/Vectorize/VPlanDominatorTree.h b/lib/Transforms/Vectorize/VPlanDominatorTree.h
index 1b81097b6d31..19f5d2c00c60 100644
--- a/lib/Transforms/Vectorize/VPlanDominatorTree.h
+++ b/lib/Transforms/Vectorize/VPlanDominatorTree.h
@@ -1,9 +1,8 @@
//===-- VPlanDominatorTree.h ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 0f42694e193b..df96f67288f1 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -1,9 +1,8 @@
//===-- VPlanHCFGBuilder.cpp ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -64,7 +63,9 @@ private:
void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
void fixPhiNodes();
VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
+#ifndef NDEBUG
bool isExternalDef(Value *Val);
+#endif
VPValue *getOrCreateVPOperand(Value *IRVal);
void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
@@ -119,6 +120,7 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
return VPBB;
}
+#ifndef NDEBUG
// Return true if \p Val is considered an external definition. An external
// definition is either:
// 1. A Value that is not an Instruction. This will be refined in the future.
@@ -154,6 +156,7 @@ bool PlainCFGBuilder::isExternalDef(Value *Val) {
// Check whether Instruction definition is in loop body.
return !TheLoop->contains(Inst);
}
+#endif
// Create a new VPValue or retrieve an existing one for the Instruction's
// operand \p IRVal. This function must only be used to create/retrieve VPValues
diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
index 3f11dcb5164d..238ee7e6347c 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
+++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
@@ -1,9 +1,8 @@
//===-- VPlanHCFGBuilder.h --------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp b/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
index 3ad7fc7e7b96..7ed7d21b6caa 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
+++ b/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
@@ -1,9 +1,8 @@
//===-- VPlanHCFGTransforms.cpp - Utility VPlan to VPlan transforms -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Transforms/Vectorize/VPlanHCFGTransforms.h b/lib/Transforms/Vectorize/VPlanHCFGTransforms.h
index ae549c6871b3..79a23c33184f 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGTransforms.h
+++ b/lib/Transforms/Vectorize/VPlanHCFGTransforms.h
@@ -1,9 +1,8 @@
//===- VPlanHCFGTransforms.h - Utility VPlan to VPlan transforms ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Transforms/Vectorize/VPlanLoopInfo.h b/lib/Transforms/Vectorize/VPlanLoopInfo.h
index 5c2485fc2145..5208f2d58e2b 100644
--- a/lib/Transforms/Vectorize/VPlanLoopInfo.h
+++ b/lib/Transforms/Vectorize/VPlanLoopInfo.h
@@ -1,9 +1,8 @@
//===-- VPLoopInfo.h --------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Transforms/Vectorize/VPlanPredicator.cpp b/lib/Transforms/Vectorize/VPlanPredicator.cpp
new file mode 100644
index 000000000000..7a80f3ff80a5
--- /dev/null
+++ b/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -0,0 +1,248 @@
+//===-- VPlanPredicator.cpp -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the VPlanPredicator class which contains the public
+/// interfaces to predicate and linearize the VPlan region.
+///
+//===----------------------------------------------------------------------===//
+
+#include "VPlanPredicator.h"
+#include "VPlan.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "VPlanPredicator"
+
+using namespace llvm;
+
+// Generate VPInstructions at the beginning of CurrBB that calculate the
+// predicate being propagated from PredBB to CurrBB depending on the edge type
+// between them. For example if:
+// i. PredBB is controlled by predicate %BP, and
+// ii. The edge PredBB->CurrBB is the false edge, controlled by the condition
+// bit value %CBV then this function will generate the following two
+// VPInstructions at the start of CurrBB:
+// %IntermediateVal = not %CBV
+// %FinalVal = and %BP %IntermediateVal
+// It returns %FinalVal.
+VPValue *VPlanPredicator::getOrCreateNotPredicate(VPBasicBlock *PredBB,
+ VPBasicBlock *CurrBB) {
+ VPValue *CBV = PredBB->getCondBit();
+
+ // Set the intermediate value - this is either 'CBV', or 'not CBV'
+ // depending on the edge type.
+ EdgeType ET = getEdgeTypeBetween(PredBB, CurrBB);
+ VPValue *IntermediateVal = nullptr;
+ switch (ET) {
+ case EdgeType::TRUE_EDGE:
+ // CurrBB is the true successor of PredBB - nothing to do here.
+ IntermediateVal = CBV;
+ break;
+
+ case EdgeType::FALSE_EDGE:
+ // CurrBB is the False successor of PredBB - compute not of CBV.
+ IntermediateVal = Builder.createNot(CBV);
+ break;
+ }
+
+ // Now AND intermediate value with PredBB's block predicate if it has one.
+ VPValue *BP = PredBB->getPredicate();
+ if (BP)
+ return Builder.createAnd(BP, IntermediateVal);
+ else
+ return IntermediateVal;
+}
+
+// Generate a tree of ORs for all IncomingPredicates in WorkList.
+// Note: This function destroys the original Worklist.
+//
+// P1 P2 P3 P4 P5
+// \ / \ / /
+// OR1 OR2 /
+// \ | /
+// \ +/-+
+// \ / |
+// OR3 |
+// \ |
+// OR4 <- Returns this
+// |
+//
+// The algorithm uses a worklist of predicates as its main data structure.
+// We pop a pair of values from the front (e.g. P1 and P2), generate an OR
+// (in this example OR1), and push it back. In this example the worklist
+// contains {P3, P4, P5, OR1}.
+// The process iterates until we have only one element in the Worklist (OR4).
+// The last element is the root predicate which is returned.
+VPValue *VPlanPredicator::genPredicateTree(std::list<VPValue *> &Worklist) {
+ if (Worklist.empty())
+ return nullptr;
+
+ // The worklist initially contains all the leaf nodes. Initialize the tree
+ // using them.
+ while (Worklist.size() >= 2) {
+ // Pop a pair of values from the front.
+ VPValue *LHS = Worklist.front();
+ Worklist.pop_front();
+ VPValue *RHS = Worklist.front();
+ Worklist.pop_front();
+
+ // Create an OR of these values.
+ VPValue *Or = Builder.createOr(LHS, RHS);
+
+ // Push OR to the back of the worklist.
+ Worklist.push_back(Or);
+ }
+
+ assert(Worklist.size() == 1 && "Expected 1 item in worklist");
+
+ // The root is the last node in the worklist.
+ VPValue *Root = Worklist.front();
+
+ // This root needs to replace the existing block predicate. This is done in
+ // the caller function.
+ return Root;
+}
+
+// Return whether the edge FromBlock -> ToBlock is a TRUE_EDGE or FALSE_EDGE
+VPlanPredicator::EdgeType
+VPlanPredicator::getEdgeTypeBetween(VPBlockBase *FromBlock,
+ VPBlockBase *ToBlock) {
+ unsigned Count = 0;
+ for (VPBlockBase *SuccBlock : FromBlock->getSuccessors()) {
+ if (SuccBlock == ToBlock) {
+ assert(Count < 2 && "Switch not supported currently");
+ return (Count == 0) ? EdgeType::TRUE_EDGE : EdgeType::FALSE_EDGE;
+ }
+ Count++;
+ }
+
+ llvm_unreachable("Broken getEdgeTypeBetween");
+}
+
+// Generate all predicates needed for CurrBlock by going through its immediate
+// predecessor blocks.
+void VPlanPredicator::createOrPropagatePredicates(VPBlockBase *CurrBlock,
+ VPRegionBlock *Region) {
+ // Blocks that dominate region exit inherit the predicate from the region.
+ // Return after setting the predicate.
+ if (VPDomTree.dominates(CurrBlock, Region->getExit())) {
+ VPValue *RegionBP = Region->getPredicate();
+ CurrBlock->setPredicate(RegionBP);
+ return;
+ }
+
+ // Collect all incoming predicates in a worklist.
+ std::list<VPValue *> IncomingPredicates;
+
+ // Set the builder's insertion point to the top of the current BB
+ VPBasicBlock *CurrBB = cast<VPBasicBlock>(CurrBlock->getEntryBasicBlock());
+ Builder.setInsertPoint(CurrBB, CurrBB->begin());
+
+ // For each predecessor, generate the VPInstructions required for
+ // computing 'BP AND (not) CBV" at the top of CurrBB.
+ // Collect the outcome of this calculation for all predecessors
+ // into IncomingPredicates.
+ for (VPBlockBase *PredBlock : CurrBlock->getPredecessors()) {
+ // Skip back-edges
+ if (VPBlockUtils::isBackEdge(PredBlock, CurrBlock, VPLI))
+ continue;
+
+ VPValue *IncomingPredicate = nullptr;
+ unsigned NumPredSuccsNoBE =
+ VPBlockUtils::countSuccessorsNoBE(PredBlock, VPLI);
+
+ // If there is an unconditional branch to the currBB, then we don't create
+ // edge predicates. We use the predecessor's block predicate instead.
+ if (NumPredSuccsNoBE == 1)
+ IncomingPredicate = PredBlock->getPredicate();
+ else if (NumPredSuccsNoBE == 2) {
+ // Emit recipes into CurrBlock if required
+ assert(isa<VPBasicBlock>(PredBlock) && "Only BBs have multiple exits");
+ IncomingPredicate =
+ getOrCreateNotPredicate(cast<VPBasicBlock>(PredBlock), CurrBB);
+ } else
+ llvm_unreachable("FIXME: switch statement ?");
+
+ if (IncomingPredicate)
+ IncomingPredicates.push_back(IncomingPredicate);
+ }
+
+ // Logically OR all incoming predicates by building the Predicate Tree.
+ VPValue *Predicate = genPredicateTree(IncomingPredicates);
+
+ // Now update the block's predicate with the new one.
+ CurrBlock->setPredicate(Predicate);
+}
+
+// Generate all predicates needed for Region.
+void VPlanPredicator::predicateRegionRec(VPRegionBlock *Region) {
+ VPBasicBlock *EntryBlock = cast<VPBasicBlock>(Region->getEntry());
+ ReversePostOrderTraversal<VPBlockBase *> RPOT(EntryBlock);
+
+ // Generate edge predicates and append them to the block predicate. RPO is
+ // necessary since the predecessor blocks' block predicate needs to be set
+ // before the current block's block predicate can be computed.
+ for (VPBlockBase *Block : make_range(RPOT.begin(), RPOT.end())) {
+ // TODO: Handle nested regions once we start generating the same.
+ assert(!isa<VPRegionBlock>(Block) && "Nested region not expected");
+ createOrPropagatePredicates(Block, Region);
+ }
+}
+
+// Linearize the CFG within Region.
+// TODO: Predication and linearization need RPOT for every region.
+// This traversal is expensive. Since predication is not adding new
+// blocks, we should be able to compute RPOT once in predication and
+// reuse it here. This becomes even more important once we have nested
+// regions.
+void VPlanPredicator::linearizeRegionRec(VPRegionBlock *Region) {
+ ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry());
+ VPBlockBase *PrevBlock = nullptr;
+
+ for (VPBlockBase *CurrBlock : make_range(RPOT.begin(), RPOT.end())) {
+ // TODO: Handle nested regions once we start generating the same.
+ assert(!isa<VPRegionBlock>(CurrBlock) && "Nested region not expected");
+
+ // Linearize control flow by adding an unconditional edge between PrevBlock
+ // and CurrBlock skipping loop headers and latches to keep intact loop
+ // header predecessors and loop latch successors.
+ if (PrevBlock && !VPLI->isLoopHeader(CurrBlock) &&
+ !VPBlockUtils::blockIsLoopLatch(PrevBlock, VPLI)) {
+
+ LLVM_DEBUG(dbgs() << "Linearizing: " << PrevBlock->getName() << "->"
+ << CurrBlock->getName() << "\n");
+
+ PrevBlock->clearSuccessors();
+ CurrBlock->clearPredecessors();
+ VPBlockUtils::connectBlocks(PrevBlock, CurrBlock);
+ }
+
+ PrevBlock = CurrBlock;
+ }
+}
+
+// Entry point. The driver function for the predicator.
+void VPlanPredicator::predicate(void) {
+ // Predicate the blocks within Region.
+ predicateRegionRec(cast<VPRegionBlock>(Plan.getEntry()));
+
+ // Linearlize the blocks with Region.
+ linearizeRegionRec(cast<VPRegionBlock>(Plan.getEntry()));
+}
+
+VPlanPredicator::VPlanPredicator(VPlan &Plan)
+ : Plan(Plan), VPLI(&(Plan.getVPLoopInfo())) {
+ // FIXME: Predicator is currently computing the dominator information for the
+ // top region. Once we start storing dominator information in a VPRegionBlock,
+ // we can avoid this recalculation.
+ VPDomTree.recalculate(*(cast<VPRegionBlock>(Plan.getEntry())));
+}
diff --git a/lib/Transforms/Vectorize/VPlanPredicator.h b/lib/Transforms/Vectorize/VPlanPredicator.h
new file mode 100644
index 000000000000..692afd2978d5
--- /dev/null
+++ b/lib/Transforms/Vectorize/VPlanPredicator.h
@@ -0,0 +1,74 @@
+//===-- VPlanPredicator.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the VPlanPredicator class which contains the public
+/// interfaces to predicate and linearize the VPlan region.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_PREDICATOR_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_PREDICATOR_H
+
+#include "LoopVectorizationPlanner.h"
+#include "VPlan.h"
+#include "VPlanDominatorTree.h"
+
+namespace llvm {
+
+class VPlanPredicator {
+private:
+ enum class EdgeType {
+ TRUE_EDGE,
+ FALSE_EDGE,
+ };
+
+ // VPlan being predicated.
+ VPlan &Plan;
+
+ // VPLoopInfo for Plan's HCFG.
+ VPLoopInfo *VPLI;
+
+ // Dominator tree for Plan's HCFG.
+ VPDominatorTree VPDomTree;
+
+ // VPlan builder used to generate VPInstructions for block predicates.
+ VPBuilder Builder;
+
+ /// Get the type of edge from \p FromBlock to \p ToBlock. Returns TRUE_EDGE if
+ /// \p ToBlock is either the unconditional successor or the conditional true
+ /// successor of \p FromBlock and FALSE_EDGE otherwise.
+ EdgeType getEdgeTypeBetween(VPBlockBase *FromBlock, VPBlockBase *ToBlock);
+
+ /// Create and return VPValue corresponding to the predicate for the edge from
+ /// \p PredBB to \p CurrentBlock.
+ VPValue *getOrCreateNotPredicate(VPBasicBlock *PredBB, VPBasicBlock *CurrBB);
+
+ /// Generate and return the result of ORing all the predicate VPValues in \p
+ /// Worklist.
+ VPValue *genPredicateTree(std::list<VPValue *> &Worklist);
+
+ /// Create or propagate predicate for \p CurrBlock in region \p Region using
+ /// predicate(s) of its predecessor(s)
+ void createOrPropagatePredicates(VPBlockBase *CurrBlock,
+ VPRegionBlock *Region);
+
+ /// Predicate the CFG within \p Region.
+ void predicateRegionRec(VPRegionBlock *Region);
+
+ /// Linearize the CFG within \p Region.
+ void linearizeRegionRec(VPRegionBlock *Region);
+
+public:
+ VPlanPredicator(VPlan &Plan);
+
+ /// Predicate Plan's HCFG.
+ void predicate(void);
+};
+} // end namespace llvm
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_PREDICATOR_H
diff --git a/lib/Transforms/Vectorize/VPlanSLP.cpp b/lib/Transforms/Vectorize/VPlanSLP.cpp
index ad3a85a6f760..e5ab24e52df6 100644
--- a/lib/Transforms/Vectorize/VPlanSLP.cpp
+++ b/lib/Transforms/Vectorize/VPlanSLP.cpp
@@ -1,9 +1,8 @@
//===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// This file implements SLP analysis based on VPlan. The analysis is based on
diff --git a/lib/Transforms/Vectorize/VPlanValue.h b/lib/Transforms/Vectorize/VPlanValue.h
index b473579b699f..7b6c228c229e 100644
--- a/lib/Transforms/Vectorize/VPlanValue.h
+++ b/lib/Transforms/Vectorize/VPlanValue.h
@@ -1,9 +1,8 @@
//===- VPlanValue.h - Represent Values in Vectorizer Plan -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Transforms/Vectorize/VPlanVerifier.cpp b/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 054bed4e177f..394b1b93113b 100644
--- a/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -1,9 +1,8 @@
//===-- VPlanVerifier.cpp -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Transforms/Vectorize/VPlanVerifier.h b/lib/Transforms/Vectorize/VPlanVerifier.h
index d2f99d006a66..7d2b26252172 100644
--- a/lib/Transforms/Vectorize/VPlanVerifier.h
+++ b/lib/Transforms/Vectorize/VPlanVerifier.h
@@ -1,9 +1,8 @@
//===-- VPlanVerifier.h -----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp
index 559ab1968844..6a4f9169c2af 100644
--- a/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -1,9 +1,8 @@
//===-- Vectorize.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//