aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm-c/Core.h1
-rw-r--r--llvm/include/llvm-c/DebugInfo.h2
-rw-r--r--llvm/include/llvm/ADT/APFixedPoint.h4
-rw-r--r--llvm/include/llvm/ADT/APFloat.h1
-rw-r--r--llvm/include/llvm/ADT/APSInt.h9
-rw-r--r--llvm/include/llvm/ADT/Any.h11
-rw-r--r--llvm/include/llvm/ADT/BitVector.h7
-rw-r--r--llvm/include/llvm/ADT/BreadthFirstIterator.h15
-rw-r--r--llvm/include/llvm/ADT/CachedHashString.h19
-rw-r--r--llvm/include/llvm/ADT/CoalescingBitVector.h3
-rw-r--r--llvm/include/llvm/ADT/DenseMap.h7
-rw-r--r--llvm/include/llvm/ADT/DenseMapInfo.h7
-rw-r--r--llvm/include/llvm/ADT/DenseSet.h7
-rw-r--r--llvm/include/llvm/ADT/DepthFirstIterator.h46
-rw-r--r--llvm/include/llvm/ADT/DirectedGraph.h9
-rw-r--r--llvm/include/llvm/ADT/EnumeratedArray.h8
-rw-r--r--llvm/include/llvm/ADT/EpochTracker.h11
-rw-r--r--llvm/include/llvm/ADT/EquivalenceClasses.h9
-rw-r--r--llvm/include/llvm/ADT/FloatingPointMode.h7
-rw-r--r--llvm/include/llvm/ADT/FoldingSet.h11
-rw-r--r--llvm/include/llvm/ADT/GenericCycleImpl.h27
-rw-r--r--llvm/include/llvm/ADT/GenericCycleInfo.h4
-rw-r--r--llvm/include/llvm/ADT/GraphTraits.h16
-rw-r--r--llvm/include/llvm/ADT/ImmutableList.h7
-rw-r--r--llvm/include/llvm/ADT/ImmutableMap.h7
-rw-r--r--llvm/include/llvm/ADT/ImmutableSet.h7
-rw-r--r--llvm/include/llvm/ADT/IndexedMap.h19
-rw-r--r--llvm/include/llvm/ADT/IntEqClasses.h21
-rw-r--r--llvm/include/llvm/ADT/IntervalMap.h49
-rw-r--r--llvm/include/llvm/ADT/IntrusiveRefCntPtr.h95
-rw-r--r--llvm/include/llvm/ADT/MapVector.h13
-rw-r--r--llvm/include/llvm/ADT/None.h11
-rw-r--r--llvm/include/llvm/ADT/Optional.h13
-rw-r--r--llvm/include/llvm/ADT/PackedVector.h7
-rw-r--r--llvm/include/llvm/ADT/PointerIntPair.h7
-rw-r--r--llvm/include/llvm/ADT/PointerUnion.h9
-rw-r--r--llvm/include/llvm/ADT/PostOrderIterator.h11
-rw-r--r--llvm/include/llvm/ADT/PriorityQueue.h7
-rw-r--r--llvm/include/llvm/ADT/STLArrayExtras.h35
-rw-r--r--llvm/include/llvm/ADT/STLExtras.h28
-rw-r--r--llvm/include/llvm/ADT/STLForwardCompat.h13
-rw-r--r--llvm/include/llvm/ADT/ScopeExit.h9
-rw-r--r--llvm/include/llvm/ADT/SetOperations.h9
-rw-r--r--llvm/include/llvm/ADT/SetVector.h19
-rw-r--r--llvm/include/llvm/ADT/SmallBitVector.h7
-rw-r--r--llvm/include/llvm/ADT/SmallPtrSet.h7
-rw-r--r--llvm/include/llvm/ADT/SmallSet.h7
-rw-r--r--llvm/include/llvm/ADT/SmallString.h7
-rw-r--r--llvm/include/llvm/ADT/SmallVector.h39
-rw-r--r--llvm/include/llvm/ADT/SparseBitVector.h9
-rw-r--r--llvm/include/llvm/ADT/SparseMultiSet.h21
-rw-r--r--llvm/include/llvm/ADT/SparseSet.h19
-rw-r--r--llvm/include/llvm/ADT/Statistic.h31
-rw-r--r--llvm/include/llvm/ADT/StringExtras.h12
-rw-r--r--llvm/include/llvm/ADT/StringMap.h7
-rw-r--r--llvm/include/llvm/ADT/StringMapEntry.h11
-rw-r--r--llvm/include/llvm/ADT/StringSet.h7
-rw-r--r--llvm/include/llvm/ADT/StringSwitch.h9
-rw-r--r--llvm/include/llvm/ADT/Triple.h35
-rw-r--r--llvm/include/llvm/ADT/TypeSwitch.h9
-rw-r--r--llvm/include/llvm/ADT/Waymarking.h322
-rw-r--r--llvm/include/llvm/ADT/bit.h7
-rw-r--r--llvm/include/llvm/ADT/edit_distance.h11
-rw-r--r--llvm/include/llvm/ADT/ilist.h27
-rw-r--r--llvm/include/llvm/ADT/ilist_node.h9
-rw-r--r--llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h1
-rw-r--r--llvm/include/llvm/Analysis/CycleAnalysis.h1
-rw-r--r--llvm/include/llvm/Analysis/DDG.h24
-rw-r--r--llvm/include/llvm/Analysis/DependenceAnalysis.h2
-rw-r--r--llvm/include/llvm/Analysis/DependenceGraphBuilder.h2
-rw-r--r--llvm/include/llvm/Analysis/IRSimilarityIdentifier.h45
-rw-r--r--llvm/include/llvm/Analysis/IndirectCallVisitor.h2
-rw-r--r--llvm/include/llvm/Analysis/InlineOrder.h2
-rw-r--r--llvm/include/llvm/Analysis/LazyCallGraph.h2
-rw-r--r--llvm/include/llvm/Analysis/LazyValueInfo.h2
-rw-r--r--llvm/include/llvm/Analysis/Loads.h3
-rw-r--r--llvm/include/llvm/Analysis/LoopInfo.h10
-rw-r--r--llvm/include/llvm/Analysis/MLInlineAdvisor.h1
-rw-r--r--llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h1
-rw-r--r--llvm/include/llvm/Analysis/MustExecute.h6
-rw-r--r--llvm/include/llvm/Analysis/ObjCARCUtil.h2
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolution.h6
-rw-r--r--llvm/include/llvm/Analysis/SparsePropagation.h1
-rw-r--r--llvm/include/llvm/Analysis/TargetLibraryInfo.h11
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfo.h2
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfoImpl.h3
-rw-r--r--llvm/include/llvm/BinaryFormat/ELF.h3
-rw-r--r--llvm/include/llvm/BinaryFormat/MsgPackDocument.h4
-rw-r--r--llvm/include/llvm/BinaryFormat/Swift.def26
-rw-r--r--llvm/include/llvm/BinaryFormat/Swift.h24
-rw-r--r--llvm/include/llvm/Bitcode/BitcodeWriter.h6
-rw-r--r--llvm/include/llvm/Bitstream/BitstreamReader.h3
-rw-r--r--llvm/include/llvm/CodeGen/DIE.h4
-rw-r--r--llvm/include/llvm/CodeGen/FastISel.h4
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h2
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h2
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h2
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h37
-rw-r--r--llvm/include/llvm/CodeGen/IntrinsicLowering.h4
-rw-r--r--llvm/include/llvm/CodeGen/LoopTraversal.h2
-rw-r--r--llvm/include/llvm/CodeGen/MIRFormatter.h2
-rw-r--r--llvm/include/llvm/CodeGen/MIRYamlMapping.h4
-rw-r--r--llvm/include/llvm/CodeGen/MachineFrameInfo.h11
-rw-r--r--llvm/include/llvm/CodeGen/MachineModuleSlotTracker.h2
-rw-r--r--llvm/include/llvm/CodeGen/MachineOperand.h4
-rw-r--r--llvm/include/llvm/CodeGen/MachineOutliner.h4
-rw-r--r--llvm/include/llvm/CodeGen/MachineRegisterInfo.h2
-rw-r--r--llvm/include/llvm/CodeGen/ReplaceWithVeclib.h1
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGISel.h6
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGNodes.h6
-rw-r--r--llvm/include/llvm/CodeGen/SlotIndexes.h2
-rw-r--r--llvm/include/llvm/CodeGen/SwitchLoweringUtils.h8
-rw-r--r--llvm/include/llvm/CodeGen/TargetCallingConv.h7
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h12
-rw-r--r--llvm/include/llvm/CodeGen/VirtRegMap.h11
-rw-r--r--llvm/include/llvm/DWARFLinker/DWARFStreamer.h8
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h2
-rw-r--r--llvm/include/llvm/DebugInfo/GSYM/StringTable.h2
-rw-r--r--llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h4
-rw-r--r--llvm/include/llvm/Demangle/ItaniumDemangle.h12
-rw-r--r--llvm/include/llvm/Demangle/README.txt71
-rw-r--r--llvm/include/llvm/Demangle/StringView.h9
-rw-r--r--llvm/include/llvm/Demangle/Utility.h11
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h2
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Core.h2
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h2
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h2
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h2
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPConstants.h3
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h62
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPKinds.def7
-rw-r--r--llvm/include/llvm/IR/AbstractCallSite.h4
-rw-r--r--llvm/include/llvm/IR/Attributes.h1
-rw-r--r--llvm/include/llvm/IR/CFG.h2
-rw-r--r--llvm/include/llvm/IR/DIBuilder.h1
-rw-r--r--llvm/include/llvm/IR/DebugInfoMetadata.h1
-rw-r--r--llvm/include/llvm/IR/DiagnosticInfo.h16
-rw-r--r--llvm/include/llvm/IR/Dominators.h13
-rw-r--r--llvm/include/llvm/IR/IRBuilder.h5
-rw-r--r--llvm/include/llvm/IR/IRPrintingPasses.h5
-rw-r--r--llvm/include/llvm/IR/InstrTypes.h9
-rw-r--r--llvm/include/llvm/IR/Instruction.h2
-rw-r--r--llvm/include/llvm/IR/Instructions.h3
-rw-r--r--llvm/include/llvm/IR/IntrinsicInst.h11
-rw-r--r--llvm/include/llvm/IR/Intrinsics.td4
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAArch64.td8
-rw-r--r--llvm/include/llvm/IR/LLVMContext.h1
-rw-r--r--llvm/include/llvm/IR/LLVMRemarkStreamer.h12
-rw-r--r--llvm/include/llvm/IR/LegacyPassManager.h2
-rw-r--r--llvm/include/llvm/IR/MDBuilder.h2
-rw-r--r--llvm/include/llvm/IR/Metadata.h23
-rw-r--r--llvm/include/llvm/IR/ModuleSummaryIndex.h1
-rw-r--r--llvm/include/llvm/IR/PassInstrumentation.h2
-rw-r--r--llvm/include/llvm/IR/PassManager.h5
-rw-r--r--llvm/include/llvm/IR/PassManagerImpl.h2
-rw-r--r--llvm/include/llvm/IR/PassTimingInfo.h2
-rw-r--r--llvm/include/llvm/IR/ReplaceConstant.h7
-rw-r--r--llvm/include/llvm/IR/SSAContext.h5
-rw-r--r--llvm/include/llvm/IR/SafepointIRVerifier.h2
-rw-r--r--llvm/include/llvm/IR/Statepoint.h20
-rw-r--r--llvm/include/llvm/IR/Type.h2
-rw-r--r--llvm/include/llvm/IR/Use.h1
-rw-r--r--llvm/include/llvm/InterfaceStub/IFSStub.h4
-rw-r--r--llvm/include/llvm/LineEditor/LineEditor.h2
-rw-r--r--llvm/include/llvm/MC/MCContext.h10
-rw-r--r--llvm/include/llvm/MC/MCObjectFileInfo.h14
-rw-r--r--llvm/include/llvm/MC/MCPseudoProbe.h2
-rw-r--r--llvm/include/llvm/MCA/CustomBehaviour.h2
-rw-r--r--llvm/include/llvm/MCA/HWEventListener.h2
-rw-r--r--llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h2
-rw-r--r--llvm/include/llvm/Object/Archive.h2
-rw-r--r--llvm/include/llvm/Object/ELFTypes.h2
-rw-r--r--llvm/include/llvm/Object/MachO.h4
-rw-r--r--llvm/include/llvm/Object/ObjectFile.h6
-rw-r--r--llvm/include/llvm/Passes/StandardInstrumentations.h2
-rw-r--r--llvm/include/llvm/ProfileData/InstrProf.h19
-rw-r--r--llvm/include/llvm/ProfileData/InstrProfCorrelator.h2
-rw-r--r--llvm/include/llvm/ProfileData/InstrProfData.inc4
-rw-r--r--llvm/include/llvm/ProfileData/InstrProfReader.h79
-rw-r--r--llvm/include/llvm/ProfileData/InstrProfWriter.h51
-rw-r--r--llvm/include/llvm/ProfileData/MemProfData.inc99
-rw-r--r--llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h3
-rw-r--r--llvm/include/llvm/Remarks/RemarkLinker.h6
-rw-r--r--llvm/include/llvm/Remarks/RemarkParser.h6
-rw-r--r--llvm/include/llvm/Remarks/RemarkSerializer.h6
-rw-r--r--llvm/include/llvm/Remarks/RemarkStreamer.h4
-rw-r--r--llvm/include/llvm/Support/AArch64TargetParser.def3
-rw-r--r--llvm/include/llvm/Support/AMDGPUMetadata.h5
-rw-r--r--llvm/include/llvm/Support/ARMTargetParser.def2
-rw-r--r--llvm/include/llvm/Support/BinaryStreamReader.h11
-rw-r--r--llvm/include/llvm/Support/BinaryStreamWriter.h11
-rw-r--r--llvm/include/llvm/Support/CommandLine.h2
-rw-r--r--llvm/include/llvm/Support/Compiler.h21
-rw-r--r--llvm/include/llvm/Support/FileOutputBuffer.h2
-rw-r--r--llvm/include/llvm/Support/FormatVariadicDetails.h2
-rw-r--r--llvm/include/llvm/Support/GenericDomTree.h2
-rw-r--r--llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h2
-rw-r--r--llvm/include/llvm/Support/KnownBits.h2
-rw-r--r--llvm/include/llvm/Support/RISCVISAInfo.h3
-rw-r--r--llvm/include/llvm/Support/ScopedPrinter.h8
-rw-r--r--llvm/include/llvm/Support/SuffixTree.h2
-rw-r--r--llvm/include/llvm/Support/Timer.h2
-rw-r--r--llvm/include/llvm/TableGen/Record.h2
-rw-r--r--llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h1
-rw-r--r--llvm/include/llvm/Transforms/IPO/AlwaysInliner.h1
-rw-r--r--llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h8
-rw-r--r--llvm/include/llvm/Transforms/IPO/Attributor.h168
-rw-r--r--llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h1
-rw-r--r--llvm/include/llvm/Transforms/IPO/IROutliner.h12
-rw-r--r--llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h1
-rw-r--r--llvm/include/llvm/Transforms/IPO/SampleProfile.h1
-rw-r--r--llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h2
-rw-r--r--llvm/include/llvm/Transforms/InstCombine/InstCombiner.h2
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h1
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h4
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h1
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h1
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h15
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h1
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopPassManager.h2
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h2
-rw-r--r--llvm/include/llvm/Transforms/Scalar/Scalarizer.h1
-rw-r--r--llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h3
-rw-r--r--llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h1
-rw-r--r--llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h6
-rw-r--r--llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h2
-rw-r--r--llvm/include/llvm/Transforms/Utils/Debugify.h1
-rw-r--r--llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h1
-rw-r--r--llvm/include/llvm/Transforms/Utils/LoopPeel.h2
-rw-r--r--llvm/include/llvm/Transforms/Utils/ModuleUtils.h5
-rw-r--r--llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h6
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h1
-rw-r--r--llvm/include/llvm/module.modulemap1
-rw-r--r--llvm/lib/Analysis/BasicAliasAnalysis.cpp11
-rw-r--r--llvm/lib/Analysis/IRSimilarityIdentifier.cpp31
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp6
-rw-r--r--llvm/lib/Analysis/Loads.cpp4
-rw-r--r--llvm/lib/Analysis/LoopInfo.cpp4
-rw-r--r--llvm/lib/Analysis/MemDerefPrinter.cpp8
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp41
-rw-r--r--llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp33
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp4
-rw-r--r--llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp13
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp6
-rw-r--r--llvm/lib/CodeGen/Analysis.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp12
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DIE.cpp3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp6
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp12
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfException.h10
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp8
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp25
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h2
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp4
-rw-r--r--llvm/lib/CodeGen/EarlyIfConversion.cpp7
-rw-r--r--llvm/lib/CodeGen/ExpandMemCmp.cpp7
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/IfConversion.cpp4
-rw-r--r--llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp9
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp351
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h41
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp5
-rw-r--r--llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp135
-rw-r--r--llvm/lib/CodeGen/MachineModuleInfo.cpp8
-rw-r--r--llvm/lib/CodeGen/MachineModuleSlotTracker.cpp3
-rw-r--r--llvm/lib/CodeGen/MachineRegisterInfo.cpp3
-rw-r--r--llvm/lib/CodeGen/MachineVerifier.cpp2
-rw-r--r--llvm/lib/CodeGen/PostRASchedulerList.cpp4
-rw-r--r--llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp181
-rw-r--r--llvm/lib/CodeGen/RegAllocEvictionAdvisor.h6
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.cpp183
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp76
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FastISel.cpp3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp7
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp16
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp30
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp57
-rw-r--r--llvm/lib/CodeGen/SlotIndexes.cpp2
-rw-r--r--llvm/lib/DWARFLinker/DWARFStreamer.cpp20
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp110
-rw-r--r--llvm/lib/IR/Attributes.cpp50
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp1
-rw-r--r--llvm/lib/IR/BasicBlock.cpp3
-rw-r--r--llvm/lib/IR/Comdat.cpp4
-rw-r--r--llvm/lib/IR/ConstantFold.cpp2
-rw-r--r--llvm/lib/IR/Constants.cpp7
-rw-r--r--llvm/lib/IR/Core.cpp2
-rw-r--r--llvm/lib/IR/DIBuilder.cpp2
-rw-r--r--llvm/lib/IR/DataLayout.cpp3
-rw-r--r--llvm/lib/IR/DebugInfo.cpp5
-rw-r--r--llvm/lib/IR/DebugInfoMetadata.cpp4
-rw-r--r--llvm/lib/IR/DebugLoc.cpp1
-rw-r--r--llvm/lib/IR/DiagnosticInfo.cpp6
-rw-r--r--llvm/lib/IR/Dominators.cpp18
-rw-r--r--llvm/lib/IR/Function.cpp2
-rw-r--r--llvm/lib/IR/Globals.cpp2
-rw-r--r--llvm/lib/IR/IRBuilder.cpp1
-rw-r--r--llvm/lib/IR/InlineAsm.cpp1
-rw-r--r--llvm/lib/IR/Instruction.cpp1
-rw-r--r--llvm/lib/IR/IntrinsicInst.cpp2
-rw-r--r--llvm/lib/IR/LLVMContext.cpp2
-rw-r--r--llvm/lib/IR/LLVMContextImpl.cpp12
-rw-r--r--llvm/lib/IR/LLVMContextImpl.h24
-rw-r--r--llvm/lib/IR/LLVMRemarkStreamer.cpp2
-rw-r--r--llvm/lib/IR/LegacyPassManager.cpp11
-rw-r--r--llvm/lib/IR/Metadata.cpp4
-rw-r--r--llvm/lib/IR/Module.cpp2
-rw-r--r--llvm/lib/IR/ModuleSummaryIndex.cpp1
-rw-r--r--llvm/lib/IR/Operator.cpp1
-rw-r--r--llvm/lib/IR/OptBisect.cpp1
-rw-r--r--llvm/lib/IR/PassManager.cpp5
-rw-r--r--llvm/lib/IR/ProfileSummary.cpp3
-rw-r--r--llvm/lib/IR/PseudoProbe.cpp2
-rw-r--r--llvm/lib/IR/ReplaceConstant.cpp3
-rw-r--r--llvm/lib/IR/SSAContext.cpp3
-rw-r--r--llvm/lib/IR/SafepointIRVerifier.cpp4
-rw-r--r--llvm/lib/IR/Statepoint.cpp2
-rw-r--r--llvm/lib/IR/Type.cpp2
-rw-r--r--llvm/lib/IR/Use.cpp6
-rw-r--r--llvm/lib/IR/Value.cpp3
-rw-r--r--llvm/lib/IR/Verifier.cpp12
-rw-r--r--llvm/lib/LTO/LTO.cpp3
-rw-r--r--llvm/lib/LTO/LTOBackend.cpp7
-rw-r--r--llvm/lib/LTO/ThinLTOCodeGenerator.cpp2
-rw-r--r--llvm/lib/MC/MCAsmStreamer.cpp2
-rw-r--r--llvm/lib/MC/MCContext.cpp8
-rw-r--r--llvm/lib/MC/MCObjectFileInfo.cpp12
-rw-r--r--llvm/lib/Object/MachOObjectFile.cpp13
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp1
-rw-r--r--llvm/lib/ObjectYAML/WasmEmitter.cpp13
-rw-r--r--llvm/lib/Passes/PassBuilderPipelines.cpp7
-rw-r--r--llvm/lib/ProfileData/InstrProf.cpp26
-rw-r--r--llvm/lib/ProfileData/InstrProfReader.cpp62
-rw-r--r--llvm/lib/ProfileData/InstrProfWriter.cpp31
-rw-r--r--llvm/lib/Remarks/BitstreamRemarkParser.cpp1
-rw-r--r--llvm/lib/Remarks/BitstreamRemarkParser.h4
-rw-r--r--llvm/lib/Remarks/RemarkLinker.cpp4
-rw-r--r--llvm/lib/Remarks/RemarkParser.cpp1
-rw-r--r--llvm/lib/Remarks/YAMLRemarkParser.h2
-rw-r--r--llvm/lib/Remarks/YAMLRemarkSerializer.cpp1
-rw-r--r--llvm/lib/Support/ARMAttributeParser.cpp2
-rw-r--r--llvm/lib/Support/Host.cpp1
-rw-r--r--llvm/lib/Support/RISCVISAInfo.cpp61
-rw-r--r--llvm/lib/Support/Signals.cpp2
-rw-r--r--llvm/lib/Support/Triple.cpp4
-rw-r--r--llvm/lib/Support/Valgrind.cpp2
-rw-r--r--llvm/lib/Support/Windows/Host.inc3
-rw-r--r--llvm/lib/Support/raw_ostream.cpp2
-rw-r--r--llvm/lib/TableGen/DetailedRecordsBackend.cpp11
-rw-r--r--llvm/lib/TableGen/JSONBackend.cpp8
-rw-r--r--llvm/lib/TableGen/Main.cpp2
-rw-r--r--llvm/lib/TableGen/Record.cpp8
-rw-r--r--llvm/lib/TableGen/SetTheory.cpp3
-rw-r--r--llvm/lib/TableGen/TGParser.cpp3
-rw-r--r--llvm/lib/TableGen/TableGenBackendSkeleton.cpp19
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td6
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp46
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp27
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp145
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h8
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp31
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td84
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td16
-rw-r--r--llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp97
-rw-r--r--llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h19
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.cpp1
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.h3
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp21
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp95
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp55
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h1
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td35
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp88
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp106
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h41
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp20
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h5
-rw-r--r--llvm/lib/Target/ARM/ARM.td10
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp39
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td19
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb.td9
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb2.td16
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.cpp1
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h1
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp8
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp35
-rw-r--r--llvm/lib/Target/M68k/M68kInstrBits.td8
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.td1
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp45
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrPrefix.td45
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp4
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp6
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp11
-rw-r--r--llvm/lib/Target/RISCV/RISCV.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp6
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp6
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp28
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp175
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h1
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp21
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp98
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td16
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoA.td12
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td20
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZb.td503
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td28
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZk.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp6
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h1
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp27
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp82
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.h3
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.h5
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp7
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.h3
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp38
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h43
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h38
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp16
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp1
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp41
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp34
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp18
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h4
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp230
-rw-r--r--llvm/lib/Target/X86/X86LowerAMXType.cpp2
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroFrame.cpp1
-rw-r--r--llvm/lib/Transforms/IPO/ArgumentPromotion.cpp59
-rw-r--r--llvm/lib/Transforms/IPO/Attributor.cpp244
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp560
-rw-r--r--llvm/lib/Transforms/IPO/CalledValuePropagation.cpp1
-rw-r--r--llvm/lib/Transforms/IPO/GlobalOpt.cpp27
-rw-r--r--llvm/lib/Transforms/IPO/IROutliner.cpp7
-rw-r--r--llvm/lib/Transforms/IPO/LowerTypeTests.cpp1
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp233
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfileProbe.cpp1
-rw-r--r--llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp26
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp52
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp285
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp15
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp1
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp12
-rw-r--r--llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp26
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp123
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemProfiler.cpp1
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp48
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp82
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARC.cpp13
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARC.h6
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp25
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp20
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFuse.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp33
-rw-r--r--llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp17
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp28
-rw-r--r--llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp147
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/StructurizeCFG.cpp25
-rw-r--r--llvm/lib/Transforms/Utils/BasicBlockUtils.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/CloneFunction.cpp53
-rw-r--r--llvm/lib/Transforms/Utils/CodeExtractor.cpp5
-rw-r--r--llvm/lib/Transforms/Utils/GlobalStatus.cpp25
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp8
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/LoopPeel.cpp60
-rw-r--r--llvm/lib/Transforms/Utils/ModuleUtils.cpp13
-rw-r--r--llvm/lib/Transforms/Utils/NameAnonGlobals.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/Utils.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/VNCoercion.cpp1
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp161
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp198
-rw-r--r--llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h89
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp17
-rw-r--r--llvm/lib/Transforms/Vectorize/Vectorize.cpp1
-rw-r--r--llvm/tools/llvm-ar/llvm-ar.cpp16
-rw-r--r--llvm/tools/llvm-as/llvm-as.cpp2
-rw-r--r--llvm/tools/llvm-extract/llvm-extract.cpp1
-rw-r--r--llvm/tools/llvm-lto/llvm-lto.cpp6
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp1
-rw-r--r--llvm/tools/llvm-profdata/llvm-profdata.cpp22
-rw-r--r--llvm/tools/llvm-readobj/ELFDumper.cpp1
-rw-r--r--llvm/tools/llvm-readobj/WasmDumper.cpp5
-rw-r--r--llvm/tools/llvm-stress/llvm-stress.cpp3
-rw-r--r--llvm/utils/TableGen/CodeGenDAGPatterns.cpp40
-rw-r--r--llvm/utils/TableGen/CodeGenSchedule.cpp13
-rw-r--r--llvm/utils/TableGen/CompressInstEmitter.cpp1
-rw-r--r--llvm/utils/TableGen/GICombinerEmitter.cpp12
-rw-r--r--llvm/utils/TableGen/GlobalISel/CodeExpander.cpp1
-rw-r--r--llvm/utils/TableGen/GlobalISel/CodeExpander.h2
-rw-r--r--llvm/utils/TableGen/GlobalISel/GIMatchDag.h1
-rw-r--r--llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp1
-rw-r--r--llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h5
-rw-r--r--llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp2
-rw-r--r--llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h6
-rw-r--r--llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp1
-rw-r--r--llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h6
-rw-r--r--llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp1
-rw-r--r--llvm/utils/TableGen/GlobalISelEmitter.cpp2
-rw-r--r--llvm/utils/TableGen/InfoByHwMode.cpp1
-rw-r--r--llvm/utils/TableGen/InfoByHwMode.h2
-rw-r--r--llvm/utils/TableGen/IntrinsicEmitter.cpp1
-rw-r--r--llvm/utils/TableGen/OptParserEmitter.cpp1
-rw-r--r--llvm/utils/TableGen/OptRSTEmitter.cpp7
-rw-r--r--llvm/utils/TableGen/PredicateExpander.h4
-rw-r--r--llvm/utils/TableGen/RegisterBankEmitter.cpp1
-rw-r--r--llvm/utils/TableGen/SearchableTableEmitter.cpp5
-rw-r--r--llvm/utils/TableGen/TableGen.cpp3
-rw-r--r--llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp3
-rw-r--r--llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h5
-rw-r--r--llvm/utils/TableGen/X86DisassemblerTables.cpp5
-rw-r--r--llvm/utils/TableGen/X86DisassemblerTables.h7
-rw-r--r--llvm/utils/TableGen/X86FoldTablesEmitter.cpp2
-rw-r--r--llvm/utils/TableGen/X86RecognizableInstr.cpp2
-rw-r--r--llvm/utils/TableGen/X86RecognizableInstr.h11
543 files changed, 6708 insertions, 3800 deletions
diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h
index ca3ca24487a5..09d80841fa5d 100644
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -18,6 +18,7 @@
#include "llvm-c/Deprecated.h"
#include "llvm-c/ErrorHandling.h"
#include "llvm-c/ExternC.h"
+
#include "llvm-c/Types.h"
LLVM_C_EXTERN_C_BEGIN
diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h
index a515533f38e2..8554a0199873 100644
--- a/llvm/include/llvm-c/DebugInfo.h
+++ b/llvm/include/llvm-c/DebugInfo.h
@@ -16,8 +16,8 @@
#ifndef LLVM_C_DEBUGINFO_H
#define LLVM_C_DEBUGINFO_H
-#include "llvm-c/Core.h"
#include "llvm-c/ExternC.h"
+#include "llvm-c/Types.h"
LLVM_C_EXTERN_C_BEGIN
diff --git a/llvm/include/llvm/ADT/APFixedPoint.h b/llvm/include/llvm/ADT/APFixedPoint.h
index d6349e6b2a88..92cabdd9f9e4 100644
--- a/llvm/include/llvm/ADT/APFixedPoint.h
+++ b/llvm/include/llvm/ADT/APFixedPoint.h
@@ -5,12 +5,12 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
+///
/// \file
/// Defines the fixed point number interface.
/// This is a class for abstracting various operations performed on fixed point
/// types.
-//
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_APFIXEDPOINT_H
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 40e0e32c77a8..17b57de7b0aa 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// \brief
/// This file declares a class to represent arbitrary precision floating point
/// values and provide a variety of arithmetic operations on them.
///
diff --git a/llvm/include/llvm/ADT/APSInt.h b/llvm/include/llvm/ADT/APSInt.h
index c1cf3c546070..7b6af436f577 100644
--- a/llvm/include/llvm/ADT/APSInt.h
+++ b/llvm/include/llvm/ADT/APSInt.h
@@ -5,10 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file implements the APSInt class, which is a simple class that
-// represents an arbitrary sized integer that knows its signedness.
-//
+///
+/// \file
+/// This file implements the APSInt class, which is a simple class that
+/// represents an arbitrary sized integer that knows its signedness.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_APSINT_H
diff --git a/llvm/include/llvm/ADT/Any.h b/llvm/include/llvm/ADT/Any.h
index 1b4f2c2fa985..1c7ba0371781 100644
--- a/llvm/include/llvm/ADT/Any.h
+++ b/llvm/include/llvm/ADT/Any.h
@@ -5,11 +5,12 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file provides Any, a non-template class modeled in the spirit of
-// std::any. The idea is to provide a type-safe replacement for C's void*.
-// It can hold a value of any copy-constructible copy-assignable type
-//
+///
+/// \file
+/// This file provides Any, a non-template class modeled in the spirit of
+/// std::any. The idea is to provide a type-safe replacement for C's void*.
+/// It can hold a value of any copy-constructible copy-assignable type
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_ANY_H
diff --git a/llvm/include/llvm/ADT/BitVector.h b/llvm/include/llvm/ADT/BitVector.h
index fff4a8f578d2..9540b3985963 100644
--- a/llvm/include/llvm/ADT/BitVector.h
+++ b/llvm/include/llvm/ADT/BitVector.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file implements the BitVector class.
-//
+///
+/// \file
+/// This file implements the BitVector class.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_BITVECTOR_H
diff --git a/llvm/include/llvm/ADT/BreadthFirstIterator.h b/llvm/include/llvm/ADT/BreadthFirstIterator.h
index 7d728a23b19a..1312b5f91e83 100644
--- a/llvm/include/llvm/ADT/BreadthFirstIterator.h
+++ b/llvm/include/llvm/ADT/BreadthFirstIterator.h
@@ -5,13 +5,14 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file builds on the ADT/GraphTraits.h file to build a generic breadth
-// first graph iterator. This file exposes the following functions/types:
-//
-// bf_begin/bf_end/bf_iterator
-// * Normal breadth-first iteration - visit a graph level-by-level.
-//
+///
+/// \file
+/// This file builds on the ADT/GraphTraits.h file to build a generic breadth
+/// first graph iterator. This file exposes the following functions/types:
+///
+/// bf_begin/bf_end/bf_iterator
+/// * Normal breadth-first iteration - visit a graph level-by-level.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_BREADTHFIRSTITERATOR_H
diff --git a/llvm/include/llvm/ADT/CachedHashString.h b/llvm/include/llvm/ADT/CachedHashString.h
index 785bd07b3a44..ebd40e320715 100644
--- a/llvm/include/llvm/ADT/CachedHashString.h
+++ b/llvm/include/llvm/ADT/CachedHashString.h
@@ -5,15 +5,16 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines CachedHashString and CachedHashStringRef. These are owning
-// and not-owning string types that store their hash in addition to their string
-// data.
-//
-// Unlike std::string, CachedHashString can be used in DenseSet/DenseMap
-// (because, unlike std::string, CachedHashString lets us have empty and
-// tombstone values).
-//
+///
+/// \file
+/// This file defines CachedHashString and CachedHashStringRef. These are
+/// owning and not-owning string types that store their hash in addition to
+/// their string data.
+///
+/// Unlike std::string, CachedHashString can be used in DenseSet/DenseMap
+/// (because, unlike std::string, CachedHashString lets us have empty and
+/// tombstone values).
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_CACHEDHASHSTRING_H
diff --git a/llvm/include/llvm/ADT/CoalescingBitVector.h b/llvm/include/llvm/ADT/CoalescingBitVector.h
index 6935c255a099..4940bc1c2c18 100644
--- a/llvm/include/llvm/ADT/CoalescingBitVector.h
+++ b/llvm/include/llvm/ADT/CoalescingBitVector.h
@@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//
///
-/// \file A bitvector that uses an IntervalMap to coalesce adjacent elements
+/// \file
+/// A bitvector that uses an IntervalMap to coalesce adjacent elements
/// into intervals.
///
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h
index 595eabd0ffb4..7673b66ca42a 100644
--- a/llvm/include/llvm/ADT/DenseMap.h
+++ b/llvm/include/llvm/ADT/DenseMap.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the DenseMap class.
-//
+///
+/// \file
+/// This file defines the DenseMap class.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_DENSEMAP_H
diff --git a/llvm/include/llvm/ADT/DenseMapInfo.h b/llvm/include/llvm/ADT/DenseMapInfo.h
index 75b7371a3683..afd478f0b849 100644
--- a/llvm/include/llvm/ADT/DenseMapInfo.h
+++ b/llvm/include/llvm/ADT/DenseMapInfo.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines DenseMapInfo traits for DenseMap.
-//
+///
+/// \file
+/// This file defines DenseMapInfo traits for DenseMap.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_DENSEMAPINFO_H
diff --git a/llvm/include/llvm/ADT/DenseSet.h b/llvm/include/llvm/ADT/DenseSet.h
index e767211a0900..b89c88626e43 100644
--- a/llvm/include/llvm/ADT/DenseSet.h
+++ b/llvm/include/llvm/ADT/DenseSet.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the DenseSet and SmallDenseSet classes.
-//
+///
+/// \file
+/// This file defines the DenseSet and SmallDenseSet classes.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_DENSESET_H
diff --git a/llvm/include/llvm/ADT/DepthFirstIterator.h b/llvm/include/llvm/ADT/DepthFirstIterator.h
index 42ac61d7cf52..cea6fbcd9d29 100644
--- a/llvm/include/llvm/ADT/DepthFirstIterator.h
+++ b/llvm/include/llvm/ADT/DepthFirstIterator.h
@@ -5,28 +5,30 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file builds on the ADT/GraphTraits.h file to build generic depth
-// first graph iterator. This file exposes the following functions/types:
-//
-// df_begin/df_end/df_iterator
-// * Normal depth-first iteration - visit a node and then all of its children.
-//
-// idf_begin/idf_end/idf_iterator
-// * Depth-first iteration on the 'inverse' graph.
-//
-// df_ext_begin/df_ext_end/df_ext_iterator
-// * Normal depth-first iteration - visit a node and then all of its children.
-// This iterator stores the 'visited' set in an external set, which allows
-// it to be more efficient, and allows external clients to use the set for
-// other purposes.
-//
-// idf_ext_begin/idf_ext_end/idf_ext_iterator
-// * Depth-first iteration on the 'inverse' graph.
-// This iterator stores the 'visited' set in an external set, which allows
-// it to be more efficient, and allows external clients to use the set for
-// other purposes.
-//
+///
+/// \file
+/// This file builds on the ADT/GraphTraits.h file to build generic depth
+/// first graph iterator. This file exposes the following functions/types:
+///
+/// df_begin/df_end/df_iterator
+/// * Normal depth-first iteration - visit a node and then all of its
+/// children.
+///
+/// idf_begin/idf_end/idf_iterator
+/// * Depth-first iteration on the 'inverse' graph.
+///
+/// df_ext_begin/df_ext_end/df_ext_iterator
+/// * Normal depth-first iteration - visit a node and then all of its
+/// children. This iterator stores the 'visited' set in an external set,
+/// which allows it to be more efficient, and allows external clients to
+/// use the set for other purposes.
+///
+/// idf_ext_begin/idf_ext_end/idf_ext_iterator
+/// * Depth-first iteration on the 'inverse' graph.
+/// This iterator stores the 'visited' set in an external set, which
+/// allows it to be more efficient, and allows external clients to use
+/// the set for other purposes.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_DEPTHFIRSTITERATOR_H
diff --git a/llvm/include/llvm/ADT/DirectedGraph.h b/llvm/include/llvm/ADT/DirectedGraph.h
index e8bb9e6b2292..83c0bea6393c 100644
--- a/llvm/include/llvm/ADT/DirectedGraph.h
+++ b/llvm/include/llvm/ADT/DirectedGraph.h
@@ -5,10 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the interface and a base class implementation for a
-// directed graph.
-//
+///
+/// \file
+/// This file defines the interface and a base class implementation for a
+/// directed graph.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_DIRECTEDGRAPH_H
diff --git a/llvm/include/llvm/ADT/EnumeratedArray.h b/llvm/include/llvm/ADT/EnumeratedArray.h
index a66ec9d08c37..f54a50446c6e 100644
--- a/llvm/include/llvm/ADT/EnumeratedArray.h
+++ b/llvm/include/llvm/ADT/EnumeratedArray.h
@@ -5,9 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines an array type that can be indexed using scoped enum values.
-//
+///
+/// \file
+/// This file defines an array type that can be indexed using scoped enum
+/// values.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_ENUMERATEDARRAY_H
diff --git a/llvm/include/llvm/ADT/EpochTracker.h b/llvm/include/llvm/ADT/EpochTracker.h
index 7a2e4220afec..b06888494466 100644
--- a/llvm/include/llvm/ADT/EpochTracker.h
+++ b/llvm/include/llvm/ADT/EpochTracker.h
@@ -5,11 +5,12 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the DebugEpochBase and DebugEpochBase::HandleBase classes.
-// These can be used to write iterators that are fail-fast when LLVM is built
-// with asserts enabled.
-//
+///
+/// \file
+/// This file defines the DebugEpochBase and DebugEpochBase::HandleBase classes.
+/// These can be used to write iterators that are fail-fast when LLVM is built
+/// with asserts enabled.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_EPOCHTRACKER_H
diff --git a/llvm/include/llvm/ADT/EquivalenceClasses.h b/llvm/include/llvm/ADT/EquivalenceClasses.h
index de6bb3bca7e3..f12b683ead2d 100644
--- a/llvm/include/llvm/ADT/EquivalenceClasses.h
+++ b/llvm/include/llvm/ADT/EquivalenceClasses.h
@@ -5,10 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// Generic implementation of equivalence classes through the use Tarjan's
-// efficient union-find algorithm.
-//
+///
+/// \file
+/// Generic implementation of equivalence classes through the use Tarjan's
+/// efficient union-find algorithm.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_EQUIVALENCECLASSES_H
diff --git a/llvm/include/llvm/ADT/FloatingPointMode.h b/llvm/include/llvm/ADT/FloatingPointMode.h
index 62c127a49620..9cc69b8a8344 100644
--- a/llvm/include/llvm/ADT/FloatingPointMode.h
+++ b/llvm/include/llvm/ADT/FloatingPointMode.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// Utilities for dealing with flags related to floating point mode controls.
-//
+///
+/// \file
+/// Utilities for dealing with flags related to floating point mode controls.
+///
//===----------------------------------------------------------------------===/
#ifndef LLVM_ADT_FLOATINGPOINTMODE_H
diff --git a/llvm/include/llvm/ADT/FoldingSet.h b/llvm/include/llvm/ADT/FoldingSet.h
index fb1cb03a4b5c..a8707f0ee81e 100644
--- a/llvm/include/llvm/ADT/FoldingSet.h
+++ b/llvm/include/llvm/ADT/FoldingSet.h
@@ -5,11 +5,12 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines a hash set that can be used to remove duplication of nodes
-// in a graph. This code was originally created by Chris Lattner for use with
-// SelectionDAGCSEMap, but was isolated to provide use across the llvm code set.
-//
+///
+/// \file
+/// This file defines a hash set that can be used to remove duplication of nodes
+/// in a graph. This code was originally created by Chris Lattner for use with
+/// SelectionDAGCSEMap, but was isolated to provide use across the llvm code
+/// set.
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_FOLDINGSET_H
diff --git a/llvm/include/llvm/ADT/GenericCycleImpl.h b/llvm/include/llvm/ADT/GenericCycleImpl.h
index 5f29236eac47..d443f9e21a47 100644
--- a/llvm/include/llvm/ADT/GenericCycleImpl.h
+++ b/llvm/include/llvm/ADT/GenericCycleImpl.h
@@ -5,18 +5,19 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This template implementation resides in a separate file so that it
-// does not get injected into every .cpp file that includes the
-// generic header.
-//
-// DO NOT INCLUDE THIS FILE WHEN MERELY USING CYCLEINFO.
-//
-// This file should only be included by files that implement a
-// specialization of the relevant templates. Currently these are:
-// - CycleAnalysis.cpp
-// - MachineCycleAnalysis.cpp
-//
+///
+/// \file
+/// This template implementation resides in a separate file so that it
+/// does not get injected into every .cpp file that includes the
+/// generic header.
+///
+/// DO NOT INCLUDE THIS FILE WHEN MERELY USING CYCLEINFO.
+///
+/// This file should only be included by files that implement a
+/// specialization of the relevant templates. Currently these are:
+/// - CycleAnalysis.cpp
+/// - MachineCycleAnalysis.cpp
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_GENERICCYCLEIMPL_H
@@ -77,7 +78,7 @@ template <typename ContextT> class GenericCycleInfoCompute {
unsigned Start = 0; // DFS start; positive if block is found
unsigned End = 0; // DFS end
- DFSInfo() {}
+ DFSInfo() = default;
explicit DFSInfo(unsigned Start) : Start(Start) {}
/// Whether this node is an ancestor (or equal to) the node \p Other
diff --git a/llvm/include/llvm/ADT/GenericCycleInfo.h b/llvm/include/llvm/ADT/GenericCycleInfo.h
index 7768253e121d..d5f9cd9142ac 100644
--- a/llvm/include/llvm/ADT/GenericCycleInfo.h
+++ b/llvm/include/llvm/ADT/GenericCycleInfo.h
@@ -5,7 +5,7 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
+///
/// \file
/// \brief Find all cycles in a control-flow graph, including irreducible loops.
///
@@ -22,7 +22,7 @@
/// unique cycle C which is a superset of L.
/// - In the absence of irreducible control flow, the cycles are
/// exactly the natural loops in the program.
-//
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_GENERICCYCLEINFO_H
diff --git a/llvm/include/llvm/ADT/GraphTraits.h b/llvm/include/llvm/ADT/GraphTraits.h
index 3ce91225d80d..3a7773592af3 100644
--- a/llvm/include/llvm/ADT/GraphTraits.h
+++ b/llvm/include/llvm/ADT/GraphTraits.h
@@ -5,13 +5,15 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the little GraphTraits<X> template class that should be
-// specialized by classes that want to be iteratable by generic graph iterators.
-//
-// This file also defines the marker class Inverse that is used to iterate over
-// graphs in a graph defined, inverse ordering...
-//
+///
+/// \file
+/// This file defines the little GraphTraits<X> template class that should be
+/// specialized by classes that want to be iteratable by generic graph
+/// iterators.
+///
+/// This file also defines the marker class Inverse that is used to iterate over
+/// graphs in a graph defined, inverse ordering...
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_GRAPHTRAITS_H
diff --git a/llvm/include/llvm/ADT/ImmutableList.h b/llvm/include/llvm/ADT/ImmutableList.h
index cf27c5a16d28..23f82691825c 100644
--- a/llvm/include/llvm/ADT/ImmutableList.h
+++ b/llvm/include/llvm/ADT/ImmutableList.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the ImmutableList class.
-//
+///
+/// \file
+/// This file defines the ImmutableList class.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_IMMUTABLELIST_H
diff --git a/llvm/include/llvm/ADT/ImmutableMap.h b/llvm/include/llvm/ADT/ImmutableMap.h
index f0e898cafaf9..c9351b3213dc 100644
--- a/llvm/include/llvm/ADT/ImmutableMap.h
+++ b/llvm/include/llvm/ADT/ImmutableMap.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the ImmutableMap class.
-//
+///
+/// \file
+/// This file defines the ImmutableMap class.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_IMMUTABLEMAP_H
diff --git a/llvm/include/llvm/ADT/ImmutableSet.h b/llvm/include/llvm/ADT/ImmutableSet.h
index 8cef5acbafaa..b513fe9ec011 100644
--- a/llvm/include/llvm/ADT/ImmutableSet.h
+++ b/llvm/include/llvm/ADT/ImmutableSet.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the ImutAVLTree and ImmutableSet classes.
-//
+///
+/// \file
+/// This file defines the ImutAVLTree and ImmutableSet classes.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_IMMUTABLESET_H
diff --git a/llvm/include/llvm/ADT/IndexedMap.h b/llvm/include/llvm/ADT/IndexedMap.h
index b44f16b91d76..5ac5f798269b 100644
--- a/llvm/include/llvm/ADT/IndexedMap.h
+++ b/llvm/include/llvm/ADT/IndexedMap.h
@@ -5,15 +5,16 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file implements an indexed map. The index map template takes two
-// types. The first is the mapped type and the second is a functor
-// that maps its argument to a size_t. On instantiation a "null" value
-// can be provided to be used as a "does not exist" indicator in the
-// map. A member function grow() is provided that given the value of
-// the maximally indexed key (the argument of the functor) makes sure
-// the map has enough space for it.
-//
+///
+/// \file
+/// This file implements an indexed map. The index map template takes two
+/// types. The first is the mapped type and the second is a functor
+/// that maps its argument to a size_t. On instantiation a "null" value
+/// can be provided to be used as a "does not exist" indicator in the
+/// map. A member function grow() is provided that given the value of
+/// the maximally indexed key (the argument of the functor) makes sure
+/// the map has enough space for it.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_INDEXEDMAP_H
diff --git a/llvm/include/llvm/ADT/IntEqClasses.h b/llvm/include/llvm/ADT/IntEqClasses.h
index 08f46a3079ef..84bb58cb736c 100644
--- a/llvm/include/llvm/ADT/IntEqClasses.h
+++ b/llvm/include/llvm/ADT/IntEqClasses.h
@@ -5,16 +5,17 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// Equivalence classes for small integers. This is a mapping of the integers
-// 0 .. N-1 into M equivalence classes numbered 0 .. M-1.
-//
-// Initially each integer has its own equivalence class. Classes are joined by
-// passing a representative member of each class to join().
-//
-// Once the classes are built, compress() will number them 0 .. M-1 and prevent
-// further changes.
-//
+///
+/// \file
+/// Equivalence classes for small integers. This is a mapping of the integers
+/// 0 .. N-1 into M equivalence classes numbered 0 .. M-1.
+///
+/// Initially each integer has its own equivalence class. Classes are joined by
+/// passing a representative member of each class to join().
+///
+/// Once the classes are built, compress() will number them 0 .. M-1 and prevent
+/// further changes.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_INTEQCLASSES_H
diff --git a/llvm/include/llvm/ADT/IntervalMap.h b/llvm/include/llvm/ADT/IntervalMap.h
index 3c107a3622a9..368ed46f98d2 100644
--- a/llvm/include/llvm/ADT/IntervalMap.h
+++ b/llvm/include/llvm/ADT/IntervalMap.h
@@ -5,30 +5,31 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file implements a coalescing interval map for small objects.
-//
-// KeyT objects are mapped to ValT objects. Intervals of keys that map to the
-// same value are represented in a compressed form.
-//
-// Iterators provide ordered access to the compressed intervals rather than the
-// individual keys, and insert and erase operations use key intervals as well.
-//
-// Like SmallVector, IntervalMap will store the first N intervals in the map
-// object itself without any allocations. When space is exhausted it switches to
-// a B+-tree representation with very small overhead for small key and value
-// objects.
-//
-// A Traits class specifies how keys are compared. It also allows IntervalMap to
-// work with both closed and half-open intervals.
-//
-// Keys and values are not stored next to each other in a std::pair, so we don't
-// provide such a value_type. Dereferencing iterators only returns the mapped
-// value. The interval bounds are accessible through the start() and stop()
-// iterator methods.
-//
-// IntervalMap is optimized for small key and value objects, 4 or 8 bytes each
-// is the optimal size. For large objects use std::map instead.
+///
+/// \file
+/// This file implements a coalescing interval map for small objects.
+///
+/// KeyT objects are mapped to ValT objects. Intervals of keys that map to the
+/// same value are represented in a compressed form.
+///
+/// Iterators provide ordered access to the compressed intervals rather than the
+/// individual keys, and insert and erase operations use key intervals as well.
+///
+/// Like SmallVector, IntervalMap will store the first N intervals in the map
+/// object itself without any allocations. When space is exhausted it switches
+/// to a B+-tree representation with very small overhead for small key and
+/// value objects.
+///
+/// A Traits class specifies how keys are compared. It also allows IntervalMap
+/// to work with both closed and half-open intervals.
+///
+/// Keys and values are not stored next to each other in a std::pair, so we
+/// don't provide such a value_type. Dereferencing iterators only returns the
+/// mapped value. The interval bounds are accessible through the start() and
+/// stop() iterator methods.
+///
+/// IntervalMap is optimized for small key and value objects, 4 or 8 bytes
+/// each is the optimal size. For large objects use std::map instead.
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h b/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
index 9715c9d01b98..975535bb5676 100644
--- a/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -5,51 +5,56 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the RefCountedBase, ThreadSafeRefCountedBase, and
-// IntrusiveRefCntPtr classes.
-//
-// IntrusiveRefCntPtr is a smart pointer to an object which maintains a
-// reference count. (ThreadSafe)RefCountedBase is a mixin class that adds a
-// refcount member variable and methods for updating the refcount. An object
-// that inherits from (ThreadSafe)RefCountedBase deletes itself when its
-// refcount hits zero.
-//
-// For example:
-//
-// class MyClass : public RefCountedBase<MyClass> {};
-//
-// void foo() {
-// // Constructing an IntrusiveRefCntPtr increases the pointee's refcount by
-// // 1 (from 0 in this case).
-// IntrusiveRefCntPtr<MyClass> Ptr1(new MyClass());
-//
-// // Copying an IntrusiveRefCntPtr increases the pointee's refcount by 1.
-// IntrusiveRefCntPtr<MyClass> Ptr2(Ptr1);
-//
-// // Constructing an IntrusiveRefCntPtr has no effect on the object's
-// // refcount. After a move, the moved-from pointer is null.
-// IntrusiveRefCntPtr<MyClass> Ptr3(std::move(Ptr1));
-// assert(Ptr1 == nullptr);
-//
-// // Clearing an IntrusiveRefCntPtr decreases the pointee's refcount by 1.
-// Ptr2.reset();
-//
-// // The object deletes itself when we return from the function, because
-// // Ptr3's destructor decrements its refcount to 0.
-// }
-//
-// You can use IntrusiveRefCntPtr with isa<T>(), dyn_cast<T>(), etc.:
-//
-// IntrusiveRefCntPtr<MyClass> Ptr(new MyClass());
-// OtherClass *Other = dyn_cast<OtherClass>(Ptr); // Ptr.get() not required
-//
-// IntrusiveRefCntPtr works with any class that
-//
-// - inherits from (ThreadSafe)RefCountedBase,
-// - has Retain() and Release() methods, or
-// - specializes IntrusiveRefCntPtrInfo.
-//
+///
+/// \file
+/// This file defines the RefCountedBase, ThreadSafeRefCountedBase, and
+/// IntrusiveRefCntPtr classes.
+///
+/// IntrusiveRefCntPtr is a smart pointer to an object which maintains a
+/// reference count. (ThreadSafe)RefCountedBase is a mixin class that adds a
+/// refcount member variable and methods for updating the refcount. An object
+/// that inherits from (ThreadSafe)RefCountedBase deletes itself when its
+/// refcount hits zero.
+///
+/// For example:
+///
+/// ```
+/// class MyClass : public RefCountedBase<MyClass> {};
+///
+/// void foo() {
+/// // Constructing an IntrusiveRefCntPtr increases the pointee's refcount
+/// // by 1 (from 0 in this case).
+/// IntrusiveRefCntPtr<MyClass> Ptr1(new MyClass());
+///
+/// // Copying an IntrusiveRefCntPtr increases the pointee's refcount by 1.
+/// IntrusiveRefCntPtr<MyClass> Ptr2(Ptr1);
+///
+/// // Constructing an IntrusiveRefCntPtr has no effect on the object's
+/// // refcount. After a move, the moved-from pointer is null.
+/// IntrusiveRefCntPtr<MyClass> Ptr3(std::move(Ptr1));
+/// assert(Ptr1 == nullptr);
+///
+/// // Clearing an IntrusiveRefCntPtr decreases the pointee's refcount by 1.
+/// Ptr2.reset();
+///
+/// // The object deletes itself when we return from the function, because
+/// // Ptr3's destructor decrements its refcount to 0.
+/// }
+/// ```
+///
+/// You can use IntrusiveRefCntPtr with isa<T>(), dyn_cast<T>(), etc.:
+///
+/// ```
+/// IntrusiveRefCntPtr<MyClass> Ptr(new MyClass());
+/// OtherClass *Other = dyn_cast<OtherClass>(Ptr); // Ptr.get() not required
+/// ```
+///
+/// IntrusiveRefCntPtr works with any class that
+///
+/// - inherits from (ThreadSafe)RefCountedBase,
+/// - has Retain() and Release() methods, or
+/// - specializes IntrusiveRefCntPtrInfo.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_INTRUSIVEREFCNTPTR_H
diff --git a/llvm/include/llvm/ADT/MapVector.h b/llvm/include/llvm/ADT/MapVector.h
index d281166b3e19..c4e5c7e2bac5 100644
--- a/llvm/include/llvm/ADT/MapVector.h
+++ b/llvm/include/llvm/ADT/MapVector.h
@@ -5,12 +5,13 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file implements a map that provides insertion order iteration. The
-// interface is purposefully minimal. The key is assumed to be cheap to copy
-// and 2 copies are kept, one for indexing in a DenseMap, one for iteration in
-// a std::vector.
-//
+///
+/// \file
+/// This file implements a map that provides insertion order iteration. The
+/// interface is purposefully minimal. The key is assumed to be cheap to copy
+/// and 2 copies are kept, one for indexing in a DenseMap, one for iteration in
+/// a std::vector.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_MAPVECTOR_H
diff --git a/llvm/include/llvm/ADT/None.h b/llvm/include/llvm/ADT/None.h
index 004ca0ac50ac..1a66be4097df 100644
--- a/llvm/include/llvm/ADT/None.h
+++ b/llvm/include/llvm/ADT/None.h
@@ -5,11 +5,12 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file provides None, an enumerator for use in implicit constructors
-// of various (usually templated) types to make such construction more
-// terse.
-//
+///
+/// \file
+/// This file provides None, an enumerator for use in implicit constructors
+/// of various (usually templated) types to make such construction more
+/// terse.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_NONE_H
diff --git a/llvm/include/llvm/ADT/Optional.h b/llvm/include/llvm/ADT/Optional.h
index 7d6b3e92f6b2..e047b0fc6514 100644
--- a/llvm/include/llvm/ADT/Optional.h
+++ b/llvm/include/llvm/ADT/Optional.h
@@ -5,11 +5,12 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file provides Optional, a template class modeled in the spirit of
-// OCaml's 'opt' variant. The idea is to strongly type whether or not
-// a value can be optional.
-//
+///
+/// \file
+/// This file provides Optional, a template class modeled in the spirit of
+/// OCaml's 'opt' variant. The idea is to strongly type whether or not
+/// a value can be optional.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_OPTIONAL_H
@@ -241,7 +242,7 @@ template <typename T> class Optional {
public:
using value_type = T;
- constexpr Optional() {}
+ constexpr Optional() = default;
constexpr Optional(NoneType) {}
constexpr Optional(const T &y) : Storage(in_place, y) {}
diff --git a/llvm/include/llvm/ADT/PackedVector.h b/llvm/include/llvm/ADT/PackedVector.h
index ae7f8cc85743..b448685ab616 100644
--- a/llvm/include/llvm/ADT/PackedVector.h
+++ b/llvm/include/llvm/ADT/PackedVector.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file implements the PackedVector class.
-//
+///
+/// \file
+/// This file implements the PackedVector class.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_PACKEDVECTOR_H
diff --git a/llvm/include/llvm/ADT/PointerIntPair.h b/llvm/include/llvm/ADT/PointerIntPair.h
index 393ace6b70fc..b7ddf8855605 100644
--- a/llvm/include/llvm/ADT/PointerIntPair.h
+++ b/llvm/include/llvm/ADT/PointerIntPair.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the PointerIntPair class.
-//
+///
+/// \file
+/// This file defines the PointerIntPair class.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_POINTERINTPAIR_H
diff --git a/llvm/include/llvm/ADT/PointerUnion.h b/llvm/include/llvm/ADT/PointerUnion.h
index 5ce2dbee4b3a..04d566bbc75e 100644
--- a/llvm/include/llvm/ADT/PointerUnion.h
+++ b/llvm/include/llvm/ADT/PointerUnion.h
@@ -5,10 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the PointerUnion class, which is a discriminated union of
-// pointer types.
-//
+///
+/// \file
+/// This file defines the PointerUnion class, which is a discriminated union of
+/// pointer types.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_POINTERUNION_H
diff --git a/llvm/include/llvm/ADT/PostOrderIterator.h b/llvm/include/llvm/ADT/PostOrderIterator.h
index 74314d39d825..d0366045fa09 100644
--- a/llvm/include/llvm/ADT/PostOrderIterator.h
+++ b/llvm/include/llvm/ADT/PostOrderIterator.h
@@ -5,11 +5,12 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file builds on the ADT/GraphTraits.h file to build a generic graph
-// post order iterator. This should work over any graph type that has a
-// GraphTraits specialization.
-//
+///
+/// \file
+/// This file builds on the ADT/GraphTraits.h file to build a generic graph
+/// post order iterator. This should work over any graph type that has a
+/// GraphTraits specialization.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_POSTORDERITERATOR_H
diff --git a/llvm/include/llvm/ADT/PriorityQueue.h b/llvm/include/llvm/ADT/PriorityQueue.h
index cf79ee10ba7f..f40c160f0f5e 100644
--- a/llvm/include/llvm/ADT/PriorityQueue.h
+++ b/llvm/include/llvm/ADT/PriorityQueue.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the PriorityQueue class.
-//
+///
+/// \file
+/// This file defines the PriorityQueue class.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_PRIORITYQUEUE_H
diff --git a/llvm/include/llvm/ADT/STLArrayExtras.h b/llvm/include/llvm/ADT/STLArrayExtras.h
new file mode 100644
index 000000000000..5b666641580e
--- /dev/null
+++ b/llvm/include/llvm/ADT/STLArrayExtras.h
@@ -0,0 +1,35 @@
+//===- llvm/ADT/STLArrayExtras.h - additions to <array> ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some templates that are useful if you are working with the
+// STL at all.
+//
+// No library is required when using these functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_STLARRAYEXTRAS_H
+#define LLVM_ADT_STLARRAYEXTRAS_H
+
+#include <cstddef>
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// Extra additions for arrays
+//===----------------------------------------------------------------------===//
+
+/// Find the length of an array.
+template <class T, std::size_t N>
+constexpr inline size_t array_lengthof(T (&)[N]) {
+ return N;
+}
+
+} // end namespace llvm
+
+#endif // LLVM_ADT_STLARRAYEXTRAS_H
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index c3200c926518..e2972f4f902a 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -5,21 +5,23 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file contains some templates that are useful if you are working with the
-// STL at all.
-//
-// No library is required when using these functions.
-//
+///
+/// \file
+/// This file contains some templates that are useful if you are working with
+/// the STL at all.
+///
+/// No library is required when using these functions.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_STLEXTRAS_H
#define LLVM_ADT_STLEXTRAS_H
-#include "llvm/ADT/identity.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLArrayExtras.h"
#include "llvm/ADT/STLForwardCompat.h"
#include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/identity.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Config/abi-breaking.h"
@@ -1410,7 +1412,7 @@ constexpr decltype(auto) makeVisitor(CallableTs &&...Callables) {
}
//===----------------------------------------------------------------------===//
-// Extra additions for arrays
+// Extra additions to <algorithm>
//===----------------------------------------------------------------------===//
// We have a copy here so that LLVM behaves the same when using different
@@ -1430,12 +1432,6 @@ void shuffle(Iterator first, Iterator last, RNG &&g) {
}
}
-/// Find the length of an array.
-template <class T, std::size_t N>
-constexpr inline size_t array_lengthof(T (&)[N]) {
- return N;
-}
-
/// Adapt std::less<T> for array_pod_sort.
template<typename T>
inline int array_pod_sort_comparator(const void *P1, const void *P2) {
@@ -1563,10 +1559,6 @@ inline void sort(Container &&C, Compare Comp) {
llvm::sort(adl_begin(C), adl_end(C), Comp);
}
-//===----------------------------------------------------------------------===//
-// Extra additions to <algorithm>
-//===----------------------------------------------------------------------===//
-
/// Get the size of a range. This is a wrapper function around std::distance
/// which is only enabled when the operation is O(1).
template <typename R>
diff --git a/llvm/include/llvm/ADT/STLForwardCompat.h b/llvm/include/llvm/ADT/STLForwardCompat.h
index 440b29df260c..0aa577d3ee1a 100644
--- a/llvm/include/llvm/ADT/STLForwardCompat.h
+++ b/llvm/include/llvm/ADT/STLForwardCompat.h
@@ -5,12 +5,13 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file contains library features backported from future STL versions.
-//
-// These should be replaced with their STL counterparts as the C++ version LLVM
-// is compiled with is updated.
-//
+///
+/// \file
+/// This file contains library features backported from future STL versions.
+///
+/// These should be replaced with their STL counterparts as the C++ version LLVM
+/// is compiled with is updated.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_STLFORWARDCOMPAT_H
diff --git a/llvm/include/llvm/ADT/ScopeExit.h b/llvm/include/llvm/ADT/ScopeExit.h
index 61618818bae5..7f013f3f7979 100644
--- a/llvm/include/llvm/ADT/ScopeExit.h
+++ b/llvm/include/llvm/ADT/ScopeExit.h
@@ -5,10 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the make_scope_exit function, which executes user-defined
-// cleanup logic at scope exit.
-//
+///
+/// \file
+/// This file defines the make_scope_exit function, which executes user-defined
+/// cleanup logic at scope exit.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_SCOPEEXIT_H
diff --git a/llvm/include/llvm/ADT/SetOperations.h b/llvm/include/llvm/ADT/SetOperations.h
index 3e30b6bb83d3..c9462f077dc8 100644
--- a/llvm/include/llvm/ADT/SetOperations.h
+++ b/llvm/include/llvm/ADT/SetOperations.h
@@ -5,10 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines generic set operations that may be used on set's of
-// different types, and different element types.
-//
+///
+/// \file
+/// This file defines generic set operations that may be used on set's of
+/// different types, and different element types.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_SETOPERATIONS_H
diff --git a/llvm/include/llvm/ADT/SetVector.h b/llvm/include/llvm/ADT/SetVector.h
index 82d5e98afb5d..08cf42f0b210 100644
--- a/llvm/include/llvm/ADT/SetVector.h
+++ b/llvm/include/llvm/ADT/SetVector.h
@@ -5,15 +5,16 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file implements a set that has insertion order iteration
-// characteristics. This is useful for keeping a set of things that need to be
-// visited later but in a deterministic order (insertion order). The interface
-// is purposefully minimal.
-//
-// This file defines SetVector and SmallSetVector, which performs no allocations
-// if the SetVector has less than a certain number of elements.
-//
+///
+/// \file
+/// This file implements a set that has insertion order iteration
+/// characteristics. This is useful for keeping a set of things that need to be
+/// visited later but in a deterministic order (insertion order). The interface
+/// is purposefully minimal.
+///
+/// This file defines SetVector and SmallSetVector, which performs no
+/// allocations if the SetVector has less than a certain number of elements.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_SETVECTOR_H
diff --git a/llvm/include/llvm/ADT/SmallBitVector.h b/llvm/include/llvm/ADT/SmallBitVector.h
index 17be317a10d7..86e304cc6c02 100644
--- a/llvm/include/llvm/ADT/SmallBitVector.h
+++ b/llvm/include/llvm/ADT/SmallBitVector.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file implements the SmallBitVector class.
-//
+///
+/// \file
+/// This file implements the SmallBitVector class.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_SMALLBITVECTOR_H
diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h
index 981b741669b0..ef6dae68b4a6 100644
--- a/llvm/include/llvm/ADT/SmallPtrSet.h
+++ b/llvm/include/llvm/ADT/SmallPtrSet.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the SmallPtrSet class. See the doxygen comment for
-// SmallPtrSetImplBase for more details on the algorithm used.
+///
+/// \file
+/// This file defines the SmallPtrSet class. See the doxygen comment for
+/// SmallPtrSetImplBase for more details on the algorithm used.
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h
index fe4f74eac85d..0eed85449c9d 100644
--- a/llvm/include/llvm/ADT/SmallSet.h
+++ b/llvm/include/llvm/ADT/SmallSet.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the SmallSet class.
-//
+///
+/// \file
+/// This file defines the SmallSet class.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_SMALLSET_H
diff --git a/llvm/include/llvm/ADT/SmallString.h b/llvm/include/llvm/ADT/SmallString.h
index 81243af1f97d..874968f0a13f 100644
--- a/llvm/include/llvm/ADT/SmallString.h
+++ b/llvm/include/llvm/ADT/SmallString.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the SmallString class.
-//
+///
+/// \file
+/// This file defines the SmallString class.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_SMALLSTRING_H
diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h
index 466acb83d466..a4a790323a6b 100644
--- a/llvm/include/llvm/ADT/SmallVector.h
+++ b/llvm/include/llvm/ADT/SmallVector.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the SmallVector class.
-//
+///
+/// /file
+/// This file defines the SmallVector class.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_SMALLVECTOR_H
@@ -567,6 +568,16 @@ protected:
explicit SmallVectorImpl(unsigned N)
: SmallVectorTemplateBase<T>(N) {}
+ void assignRemote(SmallVectorImpl &&RHS) {
+ this->destroy_range(this->begin(), this->end());
+ if (!this->isSmall())
+ free(this->begin());
+ this->BeginX = RHS.BeginX;
+ this->Size = RHS.Size;
+ this->Capacity = RHS.Capacity;
+ RHS.resetToSmall();
+ }
+
public:
SmallVectorImpl(const SmallVectorImpl &) = delete;
@@ -1031,12 +1042,7 @@ SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) {
// If the RHS isn't small, clear this vector and then steal its buffer.
if (!RHS.isSmall()) {
- this->destroy_range(this->begin(), this->end());
- if (!this->isSmall()) free(this->begin());
- this->BeginX = RHS.BeginX;
- this->Size = RHS.Size;
- this->Capacity = RHS.Capacity;
- RHS.resetToSmall();
+ this->assignRemote(std::move(RHS));
return *this;
}
@@ -1227,7 +1233,20 @@ public:
}
SmallVector &operator=(SmallVector &&RHS) {
- SmallVectorImpl<T>::operator=(::std::move(RHS));
+ if (N) {
+ SmallVectorImpl<T>::operator=(::std::move(RHS));
+ return *this;
+ }
+ // SmallVectorImpl<T>::operator= does not leverage N==0. Optimize the
+ // case.
+ if (this == &RHS)
+ return *this;
+ if (RHS.empty()) {
+ this->destroy_range(this->begin(), this->end());
+ this->Size = 0;
+ } else {
+ this->assignRemote(std::move(RHS));
+ }
return *this;
}
diff --git a/llvm/include/llvm/ADT/SparseBitVector.h b/llvm/include/llvm/ADT/SparseBitVector.h
index 12850e14f4ed..a591896521ce 100644
--- a/llvm/include/llvm/ADT/SparseBitVector.h
+++ b/llvm/include/llvm/ADT/SparseBitVector.h
@@ -5,10 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the SparseBitVector class. See the doxygen comment for
-// SparseBitVector for more details on the algorithm used.
-//
+///
+/// \file
+/// This file defines the SparseBitVector class. See the doxygen comment for
+/// SparseBitVector for more details on the algorithm used.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_SPARSEBITVECTOR_H
diff --git a/llvm/include/llvm/ADT/SparseMultiSet.h b/llvm/include/llvm/ADT/SparseMultiSet.h
index f63cef936433..ef2a5ea5ed71 100644
--- a/llvm/include/llvm/ADT/SparseMultiSet.h
+++ b/llvm/include/llvm/ADT/SparseMultiSet.h
@@ -5,16 +5,17 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the SparseMultiSet class, which adds multiset behavior to
-// the SparseSet.
-//
-// A sparse multiset holds a small number of objects identified by integer keys
-// from a moderately sized universe. The sparse multiset uses more memory than
-// other containers in order to provide faster operations. Any key can map to
-// multiple values. A SparseMultiSetNode class is provided, which serves as a
-// convenient base class for the contents of a SparseMultiSet.
-//
+///
+/// \file
+/// This file defines the SparseMultiSet class, which adds multiset behavior to
+/// the SparseSet.
+///
+/// A sparse multiset holds a small number of objects identified by integer keys
+/// from a moderately sized universe. The sparse multiset uses more memory than
+/// other containers in order to provide faster operations. Any key can map to
+/// multiple values. A SparseMultiSetNode class is provided, which serves as a
+/// convenient base class for the contents of a SparseMultiSet.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_SPARSEMULTISET_H
diff --git a/llvm/include/llvm/ADT/SparseSet.h b/llvm/include/llvm/ADT/SparseSet.h
index e66d76ad88e1..5c7087b1bffe 100644
--- a/llvm/include/llvm/ADT/SparseSet.h
+++ b/llvm/include/llvm/ADT/SparseSet.h
@@ -5,15 +5,16 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the SparseSet class derived from the version described in
-// Briggs, Torczon, "An efficient representation for sparse sets", ACM Letters
-// on Programming Languages and Systems, Volume 2 Issue 1-4, March-Dec. 1993.
-//
-// A sparse set holds a small number of objects identified by integer keys from
-// a moderately sized universe. The sparse set uses more memory than other
-// containers in order to provide faster operations.
-//
+///
+/// \file
+/// This file defines the SparseSet class derived from the version described in
+/// Briggs, Torczon, "An efficient representation for sparse sets", ACM Letters
+/// on Programming Languages and Systems, Volume 2 Issue 1-4, March-Dec. 1993.
+///
+/// A sparse set holds a small number of objects identified by integer keys from
+/// a moderately sized universe. The sparse set uses more memory than other
+/// containers in order to provide faster operations.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_SPARSESET_H
diff --git a/llvm/include/llvm/ADT/Statistic.h b/llvm/include/llvm/ADT/Statistic.h
index 528d2cdcf61b..c39e161bcbcd 100644
--- a/llvm/include/llvm/ADT/Statistic.h
+++ b/llvm/include/llvm/ADT/Statistic.h
@@ -5,21 +5,22 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the 'Statistic' class, which is designed to be an easy way
-// to expose various metrics from passes. These statistics are printed at the
-// end of a run (from llvm_shutdown), when the -stats command line option is
-// passed on the command line.
-//
-// This is useful for reporting information like the number of instructions
-// simplified, optimized or removed by various transformations, like this:
-//
-// static Statistic NumInstsKilled("gcse", "Number of instructions killed");
-//
-// Later, in the code: ++NumInstsKilled;
-//
-// NOTE: Statistics *must* be declared as global variables.
-//
+///
+/// \file
+/// This file defines the 'Statistic' class, which is designed to be an easy way
+/// to expose various metrics from passes. These statistics are printed at the
+/// end of a run (from llvm_shutdown), when the -stats command line option is
+/// passed on the command line.
+///
+/// This is useful for reporting information like the number of instructions
+/// simplified, optimized or removed by various transformations, like this:
+///
+/// static Statistic NumInstsKilled("gcse", "Number of instructions killed");
+///
+/// Later, in the code: ++NumInstsKilled;
+///
+/// NOTE: Statistics *must* be declared as global variables.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_STATISTIC_H
diff --git a/llvm/include/llvm/ADT/StringExtras.h b/llvm/include/llvm/ADT/StringExtras.h
index 81a0954226d6..ee6c33924e96 100644
--- a/llvm/include/llvm/ADT/StringExtras.h
+++ b/llvm/include/llvm/ADT/StringExtras.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file contains some functions that are useful when dealing with strings.
-//
+///
+/// \file
+/// This file contains some functions that are useful when dealing with strings.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_STRINGEXTRAS_H
@@ -148,13 +149,14 @@ inline char toUpper(char x) {
return x;
}
-inline std::string utohexstr(uint64_t X, bool LowerCase = false) {
+inline std::string utohexstr(uint64_t X, bool LowerCase = false,
+ unsigned Width = 0) {
char Buffer[17];
char *BufPtr = std::end(Buffer);
if (X == 0) *--BufPtr = '0';
- while (X) {
+ for (unsigned i = 0; Width ? (i < Width) : X; ++i) {
unsigned char Mod = static_cast<unsigned char>(X) & 15;
*--BufPtr = hexdigit(Mod, LowerCase);
X >>= 4;
diff --git a/llvm/include/llvm/ADT/StringMap.h b/llvm/include/llvm/ADT/StringMap.h
index 562a2ff1a192..23248093c67e 100644
--- a/llvm/include/llvm/ADT/StringMap.h
+++ b/llvm/include/llvm/ADT/StringMap.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the StringMap class.
-//
+///
+/// \file
+/// This file defines the StringMap class.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_STRINGMAP_H
diff --git a/llvm/include/llvm/ADT/StringMapEntry.h b/llvm/include/llvm/ADT/StringMapEntry.h
index 120d4f3ca4bc..6e13c8618c85 100644
--- a/llvm/include/llvm/ADT/StringMapEntry.h
+++ b/llvm/include/llvm/ADT/StringMapEntry.h
@@ -5,11 +5,12 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the StringMapEntry class - it is intended to be a low
-// dependency implementation detail of StringMap that is more suitable for
-// inclusion in public headers than StringMap.h itself is.
-//
+///
+/// \file
+/// This file defines the StringMapEntry class - it is intended to be a low
+/// dependency implementation detail of StringMap that is more suitable for
+/// inclusion in public headers than StringMap.h itself is.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_STRINGMAPENTRY_H
diff --git a/llvm/include/llvm/ADT/StringSet.h b/llvm/include/llvm/ADT/StringSet.h
index c4245175544b..4a499463d983 100644
--- a/llvm/include/llvm/ADT/StringSet.h
+++ b/llvm/include/llvm/ADT/StringSet.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// StringSet - A set-like wrapper for the StringMap.
-//
+///
+/// \file
+/// StringSet - A set-like wrapper for the StringMap.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_STRINGSET_H
diff --git a/llvm/include/llvm/ADT/StringSwitch.h b/llvm/include/llvm/ADT/StringSwitch.h
index 4b7882d7ca10..95ab1df8d297 100644
--- a/llvm/include/llvm/ADT/StringSwitch.h
+++ b/llvm/include/llvm/ADT/StringSwitch.h
@@ -4,10 +4,11 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//===----------------------------------------------------------------------===/
-//
-// This file implements the StringSwitch template, which mimics a switch()
-// statement whose cases are string literals.
-//
+///
+/// \file
+/// This file implements the StringSwitch template, which mimics a switch()
+/// statement whose cases are string literals.
+///
//===----------------------------------------------------------------------===/
#ifndef LLVM_ADT_STRINGSWITCH_H
#define LLVM_ADT_STRINGSWITCH_H
diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h
index 0f0a7b08b5d3..42277c013035 100644
--- a/llvm/include/llvm/ADT/Triple.h
+++ b/llvm/include/llvm/ADT/Triple.h
@@ -721,6 +721,41 @@ public:
isOSBinFormatELF();
}
+ /// Tests whether the target is T32.
+ bool isArmT32() const {
+ switch (getSubArch()) {
+ case Triple::ARMSubArch_v8m_baseline:
+ case Triple::ARMSubArch_v7s:
+ case Triple::ARMSubArch_v7k:
+ case Triple::ARMSubArch_v7ve:
+ case Triple::ARMSubArch_v6:
+ case Triple::ARMSubArch_v6m:
+ case Triple::ARMSubArch_v6k:
+ case Triple::ARMSubArch_v6t2:
+ case Triple::ARMSubArch_v5:
+ case Triple::ARMSubArch_v5te:
+ case Triple::ARMSubArch_v4t:
+ return false;
+ default:
+ return true;
+ }
+ }
+
+ /// Tests whether the target is an M-class.
+ bool isArmMClass() const {
+ switch (getSubArch()) {
+ case Triple::ARMSubArch_v6m:
+ case Triple::ARMSubArch_v7m:
+ case Triple::ARMSubArch_v7em:
+ case Triple::ARMSubArch_v8m_mainline:
+ case Triple::ARMSubArch_v8m_baseline:
+ case Triple::ARMSubArch_v8_1m_mainline:
+ return true;
+ default:
+ return false;
+ }
+ }
+
/// Tests whether the target is AArch64 (little and big endian).
bool isAArch64() const {
return getArch() == Triple::aarch64 || getArch() == Triple::aarch64_be ||
diff --git a/llvm/include/llvm/ADT/TypeSwitch.h b/llvm/include/llvm/ADT/TypeSwitch.h
index 3b7598f3251d..892a7d43b317 100644
--- a/llvm/include/llvm/ADT/TypeSwitch.h
+++ b/llvm/include/llvm/ADT/TypeSwitch.h
@@ -5,10 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file implements the TypeSwitch template, which mimics a switch()
-// statement whose cases are type names.
-//
+///
+/// \file
+/// This file implements the TypeSwitch template, which mimics a switch()
+/// statement whose cases are type names.
+///
//===-----------------------------------------------------------------------===/
#ifndef LLVM_ADT_TYPESWITCH_H
diff --git a/llvm/include/llvm/ADT/Waymarking.h b/llvm/include/llvm/ADT/Waymarking.h
deleted file mode 100644
index 2efbc6f05495..000000000000
--- a/llvm/include/llvm/ADT/Waymarking.h
+++ /dev/null
@@ -1,322 +0,0 @@
-//===- Waymarking.h - Array waymarking algorithm ----------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Utility to backtrace an array's head, from a pointer into it. For the
-// backtrace to work, we use "Waymarks", which are special tags embedded into
-// the array's elements.
-//
-// A Tag of n-bits (in size) is composed as follows:
-//
-// bits: | n-1 | n-2 ... 0 |
-// .---------.------------------------------------.
-// |Stop Mask|(2^(n-1))-ary numeric system - digit|
-// '---------'------------------------------------'
-//
-// Backtracing is done as follows:
-// Walk back (starting from a given pointer to an element into the array), until
-// a tag with a "Stop Mask" is reached. Then start calculating the "Offset" from
-// the array's head, by picking up digits along the way, until another stop is
-// reached. The "Offset" is then subtracted from the current pointer, and the
-// result is the array's head.
-// A special case - if we first encounter a Tag with a Stop and a zero digit,
-// then this is already the head.
-//
-// For example:
-// In case of 2 bits:
-//
-// Tags:
-// x0 - binary digit 0
-// x1 - binary digit 1
-// 1x - stop and calculate (s)
-//
-// Array:
-// .---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.
-// head -> |s0 |s1 | 0 |s1 | 0 | 0 |s1 | 1 | 1 |s1 | 0 | 1 | 0 |s1 | 0 | 1 |
-// '---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'
-// |-1 |-2 |-4 |-7 |-10 |-14
-// <_ | | | | | |
-// <_____ | | | | |
-// <_____________ | | | |
-// <_________________________ | | |
-// <_____________________________________ | |
-// <_____________________________________________________ |
-//
-//
-// In case of 3 bits:
-//
-// Tags:
-// x00 - quaternary digit 0
-// x01 - quaternary digit 1
-// x10 - quaternary digit 2
-// x11 - quaternary digit 3
-// 1xy - stop and calculate (s)
-//
-// Array:
-// .---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.
-// head -> |s0 |s1 |s2 |s3 | 0 |s1 | 2 |s1 | 0 |s2 | 2 |s2 | 0 |s3 | 2 |s3 |
-// '---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'
-// |-1 |-2 |-3 |-4 |-6 |-8 |-10 |-12 |-14 |-16
-// <_ | | | | | | | | | |
-// <_____ | | | | | | | | |
-// <_________ | | | | | | | |
-// <_____________ | | | | | | |
-// <_____________________ | | | | | |
-// <_____________________________ | | | | |
-// <_____________________________________ | | | |
-// <_____________________________________________ | | |
-// <_____________________________________________________ | |
-// <_____________________________________________________________ |
-//
-//
-// The API introduce 2 functions:
-// 1. fillWaymarks
-// 2. followWaymarks
-//
-// Example:
-// int N = 10;
-// int M = 5;
-// int **A = new int *[N + M]; // Define the array.
-// for (int I = 0; I < N + M; ++I)
-// A[I] = new int(I);
-//
-// fillWaymarks(A, A + N); // Set the waymarks for the first N elements
-// // of the array.
-// // Note that it must be done AFTER we fill
-// // the array's elements.
-//
-// ... // Elements which are not in the range
-// // [A, A+N) will not be marked, and we won't
-// // be able to call followWaymarks on them.
-//
-// ... // Elements which will be changed after the
-// // call to fillWaymarks, will have to be
-// // retagged.
-//
-// fillWaymarks(A + N, A + N + M, N); // Set the waymarks of the remaining M
-// // elements.
-// ...
-// int **It = A + N + 1;
-// int **B = followWaymarks(It); // Find the head of the array containing It.
-// assert(B == A);
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ADT_WAYMARKING_H
-#define LLVM_ADT_WAYMARKING_H
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/PointerLikeTypeTraits.h"
-
-namespace llvm {
-
-namespace detail {
-
-template <unsigned NumBits> struct WaymarkingTraits {
- enum : unsigned {
- // The number of bits of a Waymarking Tag.
- NUM_BITS = NumBits,
-
- // A Tag is composed from a Mark and a Stop mask.
- MARK_SIZE = NUM_BITS - 1,
- STOP_MASK = (1 << MARK_SIZE),
- MARK_MASK = (STOP_MASK - 1),
- TAG_MASK = (MARK_MASK | STOP_MASK),
-
- // The number of pre-computed tags (for fast fill).
- NUM_STATIC_TAGS = 32
- };
-
-private:
- // Add a new tag, calculated from Count and Stop, to the Vals pack, while
- // continuing recursively to decrease Len down to 0.
- template <unsigned Len, bool Stop, unsigned Count, uint8_t... Vals>
- struct AddTag;
-
- // Delegate to the specialized AddTag according to the need of a Stop mask.
- template <unsigned Len, unsigned Count, uint8_t... Vals> struct GenTag {
- typedef
- typename AddTag<Len, (Count <= MARK_MASK), Count, Vals...>::Xdata Xdata;
- };
-
- // Start adding tags while calculating the next Count, which is actually the
- // number of already calculated tags (equivalent to the position in the
- // array).
- template <unsigned Len, uint8_t... Vals> struct GenOffset {
- typedef typename GenTag<Len, sizeof...(Vals), Vals...>::Xdata Xdata;
- };
-
- // Add the tag and remove it from Count.
- template <unsigned Len, unsigned Count, uint8_t... Vals>
- struct AddTag<Len, false, Count, Vals...> {
- typedef typename GenTag<Len - 1, (Count >> MARK_SIZE), Vals...,
- Count & MARK_MASK>::Xdata Xdata;
- };
-
- // We have reached the end of this Count, so start with a new Count.
- template <unsigned Len, unsigned Count, uint8_t... Vals>
- struct AddTag<Len, true, Count, Vals...> {
- typedef typename GenOffset<Len - 1, Vals...,
- (Count & MARK_MASK) | STOP_MASK>::Xdata Xdata;
- };
-
- template <unsigned Count, uint8_t... Vals> struct TagsData {
- // The remaining number for calculating the next tag, following the last one
- // in Values.
- static const unsigned Remain = Count;
-
- // The array of ordered pre-computed Tags.
- static const uint8_t Values[sizeof...(Vals)];
- };
-
- // Specialize the case when Len equals 0, as the recursion stop condition.
- template <unsigned Count, uint8_t... Vals>
- struct AddTag<0, false, Count, Vals...> {
- typedef TagsData<Count, Vals...> Xdata;
- };
-
- template <unsigned Count, uint8_t... Vals>
- struct AddTag<0, true, Count, Vals...> {
- typedef TagsData<Count, Vals...> Xdata;
- };
-
-public:
- typedef typename GenOffset<NUM_STATIC_TAGS>::Xdata Tags;
-};
-
-template <unsigned NumBits>
-template <unsigned Count, uint8_t... Vals>
-const uint8_t WaymarkingTraits<NumBits>::TagsData<
- Count, Vals...>::Values[sizeof...(Vals)] = {Vals...};
-
-} // end namespace detail
-
-/// This class is responsible for tagging (and retrieving the tag of) a given
-/// element of type T.
-template <class T, class WTraits = detail::WaymarkingTraits<
- PointerLikeTypeTraits<T>::NumLowBitsAvailable>>
-struct Waymarker {
- using Traits = WTraits;
- static void setWaymark(T &N, unsigned Tag) { N.setWaymark(Tag); }
- static unsigned getWaymark(const T &N) { return N.getWaymark(); }
-};
-
-template <class T, class WTraits> struct Waymarker<T *, WTraits> {
- using Traits = WTraits;
- static void setWaymark(T *&N, unsigned Tag) {
- reinterpret_cast<uintptr_t &>(N) |= static_cast<uintptr_t>(Tag);
- }
- static unsigned getWaymark(const T *N) {
- return static_cast<unsigned>(reinterpret_cast<uintptr_t>(N)) &
- Traits::TAG_MASK;
- }
-};
-
-/// Sets up the waymarking algorithm's tags for a given range [Begin, End).
-///
-/// \param Begin The beginning of the range to mark with tags (inclusive).
-/// \param End The ending of the range to mark with tags (exclusive).
-/// \param Offset The position in the supposed tags array from which to start
-/// marking the given range.
-template <class TIter, class Marker = Waymarker<
- typename std::iterator_traits<TIter>::value_type>>
-void fillWaymarks(TIter Begin, TIter End, size_t Offset = 0) {
- if (Begin == End)
- return;
-
- size_t Count = Marker::Traits::Tags::Remain;
- if (Offset <= Marker::Traits::NUM_STATIC_TAGS) {
- // Start by filling the pre-calculated tags, starting from the given offset.
- while (Offset != Marker::Traits::NUM_STATIC_TAGS) {
- Marker::setWaymark(*Begin, Marker::Traits::Tags::Values[Offset]);
-
- ++Offset;
- ++Begin;
-
- if (Begin == End)
- return;
- }
- } else {
- // The given offset is larger than the number of pre-computed tags, so we
- // must do it the hard way.
- // Calculate the next remaining Count, as if we have filled the tags up to
- // the given offset.
- size_t Off = Marker::Traits::NUM_STATIC_TAGS;
- do {
- ++Off;
-
- // If the count can fit into the tag, then the counting must stop.
- if (Count <= Marker::Traits::MARK_MASK) {
- Count = Off;
- } else
- Count >>= Marker::Traits::MARK_SIZE;
- } while (Off != Offset);
- }
-
- // By now, we have the matching remaining Count for the current offset.
- do {
- ++Offset;
-
- unsigned Tag = Count & Marker::Traits::MARK_MASK;
-
- // If the count can fit into the tag, then the counting must stop.
- if (Count <= Marker::Traits::MARK_MASK) {
- Tag |= Marker::Traits::STOP_MASK;
- Count = Offset;
- } else
- Count >>= Marker::Traits::MARK_SIZE;
-
- Marker::setWaymark(*Begin, Tag);
- ++Begin;
- } while (Begin != End);
-}
-
-/// Sets up the waymarking algorithm's tags for a given range.
-///
-/// \param Range The range to mark with tags.
-/// \param Offset The position in the supposed tags array from which to start
-/// marking the given range.
-template <typename R, class Marker = Waymarker<typename std::remove_reference<
- decltype(*std::begin(std::declval<R &>()))>::type>>
-void fillWaymarks(R &&Range, size_t Offset = 0) {
- return fillWaymarks<decltype(std::begin(std::declval<R &>())), Marker>(
- adl_begin(Range), adl_end(Range), Offset);
-}
-
-/// Retrieves the element marked with tag of only STOP_MASK, by following the
-/// waymarks. This is the first element in a range passed to a previous call to
-/// \c fillWaymarks with \c Offset 0.
-///
-/// For the trivial usage of calling \c fillWaymarks(Array), and \I is an
-/// iterator inside \c Array, this function retrieves the head of \c Array, by
-/// following the waymarks.
-///
-/// \param I The iterator into an array which was marked by the waymarking tags
-/// (by a previous call to \c fillWaymarks).
-template <class TIter, class Marker = Waymarker<
- typename std::iterator_traits<TIter>::value_type>>
-TIter followWaymarks(TIter I) {
- unsigned Tag;
- do
- Tag = Marker::getWaymark(*I--);
- while (!(Tag & Marker::Traits::STOP_MASK));
-
- // Special case for the first Use.
- if (Tag != Marker::Traits::STOP_MASK) {
- ptrdiff_t Offset = Tag & Marker::Traits::MARK_MASK;
- while (!((Tag = Marker::getWaymark(*I)) & Marker::Traits::STOP_MASK)) {
- Offset = (Offset << Marker::Traits::MARK_SIZE) + Tag;
- --I;
- }
- I -= Offset;
- }
- return ++I;
-}
-
-} // end namespace llvm
-
-#endif // LLVM_ADT_WAYMARKING_H
diff --git a/llvm/include/llvm/ADT/bit.h b/llvm/include/llvm/ADT/bit.h
index d76bc6c6046c..49b27c89e5fe 100644
--- a/llvm/include/llvm/ADT/bit.h
+++ b/llvm/include/llvm/ADT/bit.h
@@ -5,9 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file implements the C++20 <bit> header.
-//
+///
+/// \file
+/// This file implements the C++20 <bit> header.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_BIT_H
diff --git a/llvm/include/llvm/ADT/edit_distance.h b/llvm/include/llvm/ADT/edit_distance.h
index 4f5134008692..c480c1e7cd78 100644
--- a/llvm/include/llvm/ADT/edit_distance.h
+++ b/llvm/include/llvm/ADT/edit_distance.h
@@ -5,11 +5,12 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines a Levenshtein distance function that works for any two
-// sequences, with each element of each sequence being analogous to a character
-// in a string.
-//
+///
+/// \file
+/// This file defines a Levenshtein distance function that works for any two
+/// sequences, with each element of each sequence being analogous to a character
+/// in a string.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_EDIT_DISTANCE_H
diff --git a/llvm/include/llvm/ADT/ilist.h b/llvm/include/llvm/ADT/ilist.h
index b3aa26f2454d..9913b7cccbdd 100644
--- a/llvm/include/llvm/ADT/ilist.h
+++ b/llvm/include/llvm/ADT/ilist.h
@@ -5,19 +5,20 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines classes to implement an intrusive doubly linked list class
-// (i.e. each node of the list must contain a next and previous field for the
-// list.
-//
-// The ilist class itself should be a plug in replacement for list. This list
-// replacement does not provide a constant time size() method, so be careful to
-// use empty() when you really want to know if it's empty.
-//
-// The ilist class is implemented as a circular list. The list itself contains
-// a sentinel node, whose Next points at begin() and whose Prev points at
-// rbegin(). The sentinel node itself serves as end() and rend().
-//
+///
+/// \file
+/// This file defines classes to implement an intrusive doubly linked list class
+/// (i.e. each node of the list must contain a next and previous field for the
+/// list.
+///
+/// The ilist class itself should be a plug in replacement for list. This list
+/// replacement does not provide a constant time size() method, so be careful to
+/// use empty() when you really want to know if it's empty.
+///
+/// The ilist class is implemented as a circular list. The list itself contains
+/// a sentinel node, whose Next points at begin() and whose Prev points at
+/// rbegin(). The sentinel node itself serves as end() and rend().
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_ILIST_H
diff --git a/llvm/include/llvm/ADT/ilist_node.h b/llvm/include/llvm/ADT/ilist_node.h
index e040d9630a1e..7856b1c0d410 100644
--- a/llvm/include/llvm/ADT/ilist_node.h
+++ b/llvm/include/llvm/ADT/ilist_node.h
@@ -5,10 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file defines the ilist_node class template, which is a convenient
-// base class for creating classes that can be used with ilists.
-//
+///
+/// \file
+/// This file defines the ilist_node class template, which is a convenient
+/// base class for creating classes that can be used with ilists.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_ILIST_NODE_H
diff --git a/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h b/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h
index 043b1b7ca2dc..2dd2e7ca916d 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h
@@ -26,6 +26,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
namespace llvm {
class AAResults;
diff --git a/llvm/include/llvm/Analysis/CycleAnalysis.h b/llvm/include/llvm/Analysis/CycleAnalysis.h
index e16b908d6a10..539d29eb5e9c 100644
--- a/llvm/include/llvm/Analysis/CycleAnalysis.h
+++ b/llvm/include/llvm/Analysis/CycleAnalysis.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/GenericCycleInfo.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/SSAContext.h"
+#include "llvm/Pass.h"
namespace llvm {
extern template class GenericCycleInfo<SSAContext>;
diff --git a/llvm/include/llvm/Analysis/DDG.h b/llvm/include/llvm/Analysis/DDG.h
index 4ea589ec7efc..c5107da2a017 100644
--- a/llvm/include/llvm/Analysis/DDG.h
+++ b/llvm/include/llvm/Analysis/DDG.h
@@ -53,7 +53,7 @@ public:
DDGNode() = delete;
DDGNode(const NodeKind K) : Kind(K) {}
- DDGNode(const DDGNode &N) : DDGNodeBase(N), Kind(N.Kind) {}
+ DDGNode(const DDGNode &N) = default;
DDGNode(DDGNode &&N) : DDGNodeBase(std::move(N)), Kind(N.Kind) {}
virtual ~DDGNode() = 0;
@@ -93,7 +93,7 @@ public:
RootDDGNode() : DDGNode(NodeKind::Root) {}
RootDDGNode(const RootDDGNode &N) = delete;
RootDDGNode(RootDDGNode &&N) : DDGNode(std::move(N)) {}
- ~RootDDGNode() {}
+ ~RootDDGNode() = default;
/// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc.
static bool classof(const DDGNode *N) {
@@ -113,11 +113,7 @@ public:
SimpleDDGNode(SimpleDDGNode &&N);
~SimpleDDGNode();
- SimpleDDGNode &operator=(const SimpleDDGNode &N) {
- DDGNode::operator=(N);
- InstList = N.InstList;
- return *this;
- }
+ SimpleDDGNode &operator=(const SimpleDDGNode &N) = default;
SimpleDDGNode &operator=(SimpleDDGNode &&N) {
DDGNode::operator=(std::move(N));
@@ -179,11 +175,7 @@ public:
PiBlockDDGNode(PiBlockDDGNode &&N);
~PiBlockDDGNode();
- PiBlockDDGNode &operator=(const PiBlockDDGNode &N) {
- DDGNode::operator=(N);
- NodeList = N.NodeList;
- return *this;
- }
+ PiBlockDDGNode &operator=(const PiBlockDDGNode &N) = default;
PiBlockDDGNode &operator=(PiBlockDDGNode &&N) {
DDGNode::operator=(std::move(N));
@@ -231,11 +223,7 @@ public:
DDGEdge(DDGNode &N, EdgeKind K) : DDGEdgeBase(N), Kind(K) {}
DDGEdge(const DDGEdge &E) : DDGEdgeBase(E), Kind(E.getKind()) {}
DDGEdge(DDGEdge &&E) : DDGEdgeBase(std::move(E)), Kind(E.Kind) {}
- DDGEdge &operator=(const DDGEdge &E) {
- DDGEdgeBase::operator=(E);
- Kind = E.Kind;
- return *this;
- }
+ DDGEdge &operator=(const DDGEdge &E) = default;
DDGEdge &operator=(DDGEdge &&E) {
DDGEdgeBase::operator=(std::move(E));
@@ -272,7 +260,7 @@ public:
: Name(N), DI(DepInfo), Root(nullptr) {}
DependenceGraphInfo(DependenceGraphInfo &&G)
: Name(std::move(G.Name)), DI(std::move(G.DI)), Root(G.Root) {}
- virtual ~DependenceGraphInfo() {}
+ virtual ~DependenceGraphInfo() = default;
/// Return the label that is used to name this graph.
StringRef getName() const { return Name; }
diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h
index 8c852e85b04a..638f4869d677 100644
--- a/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -76,7 +76,7 @@ namespace llvm {
public:
Dependence(Instruction *Source, Instruction *Destination)
: Src(Source), Dst(Destination) {}
- virtual ~Dependence() {}
+ virtual ~Dependence() = default;
/// Dependence::DVEntry - Each level in the distance/direction vector
/// has a direction (or perhaps a union of several directions), and
diff --git a/llvm/include/llvm/Analysis/DependenceGraphBuilder.h b/llvm/include/llvm/Analysis/DependenceGraphBuilder.h
index 332829cbc8a9..e0dbdcdaa749 100644
--- a/llvm/include/llvm/Analysis/DependenceGraphBuilder.h
+++ b/llvm/include/llvm/Analysis/DependenceGraphBuilder.h
@@ -43,7 +43,7 @@ public:
AbstractDependenceGraphBuilder(GraphType &G, DependenceInfo &D,
const BasicBlockListType &BBs)
: Graph(G), DI(D), BBList(BBs) {}
- virtual ~AbstractDependenceGraphBuilder() {}
+ virtual ~AbstractDependenceGraphBuilder() = default;
/// The main entry to the graph construction algorithm. It starts by
/// creating nodes in increasing order of granularity and then
diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index 7b81d5754930..90ab2833e428 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -262,7 +262,20 @@ struct IRInstructionData
llvm::hash_value(ID.Inst->getType()),
llvm::hash_value(ID.getPredicate()),
llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
- else if (isa<CallInst>(ID.Inst)) {
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(ID.Inst)) {
+ // To hash intrinsics, we use the opcode, and types like the other
+ // instructions, but also, the Intrinsic ID, and the Name of the
+ // intrinsic.
+ Intrinsic::ID IntrinsicID = II->getIntrinsicID();
+ return llvm::hash_combine(
+ llvm::hash_value(ID.Inst->getOpcode()),
+ llvm::hash_value(ID.Inst->getType()), llvm::hash_value(IntrinsicID),
+ llvm::hash_value(*ID.CalleeName),
+ llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
+ }
+
+ if (isa<CallInst>(ID.Inst)) {
std::string FunctionName = *ID.CalleeName;
return llvm::hash_combine(
llvm::hash_value(ID.Inst->getOpcode()),
@@ -270,6 +283,7 @@ struct IRInstructionData
llvm::hash_value(ID.Inst->getType()), llvm::hash_value(FunctionName),
llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
}
+
return llvm::hash_combine(
llvm::hash_value(ID.Inst->getOpcode()),
llvm::hash_value(ID.Inst->getType()),
@@ -499,7 +513,7 @@ struct IRInstructionMapper {
/// be analyzed for similarity.
struct InstructionClassification
: public InstVisitor<InstructionClassification, InstrType> {
- InstructionClassification() {}
+ InstructionClassification() = default;
// TODO: Determine a scheme to resolve when the label is similar enough.
InstrType visitBranchInst(BranchInst &BI) {
@@ -525,8 +539,17 @@ struct IRInstructionMapper {
// analyzed for similarity as it has no bearing on the outcome of the
// program.
InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; }
- // TODO: Handle specific intrinsics.
- InstrType visitIntrinsicInst(IntrinsicInst &II) { return Illegal; }
+ InstrType visitIntrinsicInst(IntrinsicInst &II) {
+ // These are disabled due to complications in the CodeExtractor when
+ // outlining these instructions. For instance, It is unclear what we
+ // should do when moving only the start or end lifetime instruction into
+ // an outlined function. Also, assume-like intrinsics could be removed
+ // from the region, removing arguments, causing discrepencies in the
+ // number of inputs between different regions.
+ if (II.isLifetimeStartOrEnd() || II.isAssumeLikeIntrinsic())
+ return Illegal;
+ return EnableIntrinsics ? Legal : Illegal;
+ }
// We only allow call instructions where the function has a name and
// is not an indirect call.
InstrType visitCallInst(CallInst &CI) {
@@ -553,6 +576,10 @@ struct IRInstructionMapper {
// The flag variable that lets the classifier know whether we should
// allow indirect calls to be considered legal instructions.
bool EnableIndirectCalls = false;
+
+ // Flag that lets the classifier know whether we should allow intrinsics to
+ // be checked for similarity.
+ bool EnableIntrinsics = false;
};
/// Maps an Instruction to a member of InstrType.
@@ -939,10 +966,12 @@ class IRSimilarityIdentifier {
public:
IRSimilarityIdentifier(bool MatchBranches = true,
bool MatchIndirectCalls = true,
- bool MatchCallsWithName = false)
+ bool MatchCallsWithName = false,
+ bool MatchIntrinsics = true)
: Mapper(&InstDataAllocator, &InstDataListAllocator),
EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls),
- EnableMatchingCallsByName(MatchCallsWithName) {}
+ EnableMatchingCallsByName(MatchCallsWithName),
+ EnableIntrinsics(MatchIntrinsics) {}
private:
/// Map the instructions in the module to unsigned integers, using mapping
@@ -1031,6 +1060,10 @@ private:
/// convention, attributes and type signature.
bool EnableMatchingCallsByName = true;
+ /// The flag variable that marks whether we should check intrinsics for
+ /// similarity.
+ bool EnableIntrinsics = true;
+
/// The SimilarityGroups found with the most recent run of \ref
/// findSimilarity. None if there is no recent run.
Optional<SimilarityGroupList> SimilarityCandidates;
diff --git a/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
index eb72f2c5d14d..0825e19ecd2d 100644
--- a/llvm/include/llvm/Analysis/IndirectCallVisitor.h
+++ b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
@@ -19,7 +19,7 @@ namespace llvm {
// Visitor class that finds all indirect call.
struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> {
std::vector<CallBase *> IndirectCalls;
- PGOIndirectCallVisitor() {}
+ PGOIndirectCallVisitor() = default;
void visitCallBase(CallBase &Call) {
if (Call.isIndirectCall())
diff --git a/llvm/include/llvm/Analysis/InlineOrder.h b/llvm/include/llvm/Analysis/InlineOrder.h
index feefa9b9ddd1..84252bcf1b06 100644
--- a/llvm/include/llvm/Analysis/InlineOrder.h
+++ b/llvm/include/llvm/Analysis/InlineOrder.h
@@ -26,7 +26,7 @@ public:
using reference = T &;
using const_reference = const T &;
- virtual ~InlineOrder() {}
+ virtual ~InlineOrder() = default;
virtual size_t size() = 0;
diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h
index eb8f66bada59..c0404d37d04d 100644
--- a/llvm/include/llvm/Analysis/LazyCallGraph.h
+++ b/llvm/include/llvm/Analysis/LazyCallGraph.h
@@ -1203,7 +1203,7 @@ private:
}
};
-inline LazyCallGraph::Edge::Edge() {}
+inline LazyCallGraph::Edge::Edge() = default;
inline LazyCallGraph::Edge::Edge(Node &N, Kind K) : Value(&N, K) {}
inline LazyCallGraph::Edge::operator bool() const {
diff --git a/llvm/include/llvm/Analysis/LazyValueInfo.h b/llvm/include/llvm/Analysis/LazyValueInfo.h
index 57f732cc854b..754391e10630 100644
--- a/llvm/include/llvm/Analysis/LazyValueInfo.h
+++ b/llvm/include/llvm/Analysis/LazyValueInfo.h
@@ -38,7 +38,7 @@ class LazyValueInfo {
void operator=(const LazyValueInfo&) = delete;
public:
~LazyValueInfo();
- LazyValueInfo() {}
+ LazyValueInfo() = default;
LazyValueInfo(AssumptionCache *AC_, const DataLayout *DL_,
TargetLibraryInfo *TLI_)
: AC(AC_), DL(DL_), TLI(TLI_) {}
diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h
index 3db501c51a17..09bf98d324ed 100644
--- a/llvm/include/llvm/Analysis/Loads.h
+++ b/llvm/include/llvm/Analysis/Loads.h
@@ -42,8 +42,7 @@ bool isDereferenceablePointer(const Value *V, Type *Ty,
/// performs context-sensitive analysis and returns true if the pointer is
/// dereferenceable at the specified instruction.
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
- MaybeAlign Alignment,
- const DataLayout &DL,
+ Align Alignment, const DataLayout &DL,
const Instruction *CtxI = nullptr,
const DominatorTree *DT = nullptr,
const TargetLibraryInfo *TLI = nullptr);
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index b2326c4714dd..a0ffdb07a7ec 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -535,7 +535,7 @@ public:
DebugLoc End;
public:
- LocRange() {}
+ LocRange() = default;
LocRange(DebugLoc Start) : Start(Start), End(Start) {}
LocRange(DebugLoc Start, DebugLoc End)
: Start(std::move(Start)), End(std::move(End)) {}
@@ -900,7 +900,7 @@ template <class BlockT, class LoopT> class LoopInfoBase {
LoopInfoBase(const LoopInfoBase &) = delete;
public:
- LoopInfoBase() {}
+ LoopInfoBase() = default;
~LoopInfoBase() { releaseMemory(); }
LoopInfoBase(LoopInfoBase &&Arg)
@@ -1092,7 +1092,7 @@ class LoopInfo : public LoopInfoBase<BasicBlock, Loop> {
LoopInfo(const LoopInfo &) = delete;
public:
- LoopInfo() {}
+ LoopInfo() = default;
explicit LoopInfo(const DominatorTreeBase<BasicBlock, false> &DomTree);
LoopInfo(LoopInfo &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))) {}
@@ -1336,6 +1336,10 @@ bool hasMustProgress(const Loop *L);
/// be infinite without side effects without also being undefined)
bool isMustProgress(const Loop *L);
+/// Return true if this loop can be assumed to run for a finite number of
+/// iterations.
+bool isFinite(const Loop *L);
+
/// Return whether an MDNode might represent an access group.
///
/// Access group metadata nodes have to be distinct and empty. Being
diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
index 05411d9c99a2..b1a81d5e7030 100644
--- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
@@ -15,6 +15,7 @@
#include "llvm/IR/PassManager.h"
#include <deque>
+#include <map>
#include <memory>
namespace llvm {
diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
index cb522cf731d3..feb22c250979 100644
--- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -23,6 +23,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PredIteratorCache.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
namespace llvm {
diff --git a/llvm/include/llvm/Analysis/MustExecute.h b/llvm/include/llvm/Analysis/MustExecute.h
index df489aaa534d..18a0bfee5730 100644
--- a/llvm/include/llvm/Analysis/MustExecute.h
+++ b/llvm/include/llvm/Analysis/MustExecute.h
@@ -281,9 +281,7 @@ struct MustBeExecutedIterator {
using ExplorerTy = MustBeExecutedContextExplorer;
- MustBeExecutedIterator(const MustBeExecutedIterator &Other)
- : Visited(Other.Visited), Explorer(Other.Explorer),
- CurInst(Other.CurInst), Head(Other.Head), Tail(Other.Tail) {}
+ MustBeExecutedIterator(const MustBeExecutedIterator &Other) = default;
MustBeExecutedIterator(MustBeExecutedIterator &&Other)
: Visited(std::move(Other.Visited)), Explorer(Other.Explorer),
@@ -299,7 +297,7 @@ struct MustBeExecutedIterator {
return *this;
}
- ~MustBeExecutedIterator() {}
+ ~MustBeExecutedIterator() = default;
/// Pre- and post-increment operators.
///{
diff --git a/llvm/include/llvm/Analysis/ObjCARCUtil.h b/llvm/include/llvm/Analysis/ObjCARCUtil.h
index 1d330ca58a87..385fa5422926 100644
--- a/llvm/include/llvm/Analysis/ObjCARCUtil.h
+++ b/llvm/include/llvm/Analysis/ObjCARCUtil.h
@@ -42,7 +42,7 @@ inline bool hasAttachedCallOpBundle(const CallBase *CB) {
/// which is the address of the ARC runtime function.
inline Optional<Function *> getAttachedARCFunction(const CallBase *CB) {
auto B = CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall);
- if (!B.hasValue() || B->Inputs.size() == 0)
+ if (!B)
return None;
return cast<Function>(B->Inputs[0]);
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 1e6dac44cf2b..b16aa7017719 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1111,9 +1111,11 @@ public:
/// Simplify LHS and RHS in a comparison with predicate Pred. Return true
/// iff any changes were made. If the operands are provably equal or
/// unequal, LHS and RHS are set to the same value and Pred is set to either
- /// ICMP_EQ or ICMP_NE.
+ /// ICMP_EQ or ICMP_NE. ControllingFiniteLoop is set if this comparison
+ /// controls the exit of a loop known to have a finite number of iterations.
bool SimplifyICmpOperands(ICmpInst::Predicate &Pred, const SCEV *&LHS,
- const SCEV *&RHS, unsigned Depth = 0);
+ const SCEV *&RHS, unsigned Depth = 0,
+ bool ControllingFiniteLoop = false);
/// Return the "disposition" of the given SCEV with respect to the given
/// loop.
diff --git a/llvm/include/llvm/Analysis/SparsePropagation.h b/llvm/include/llvm/Analysis/SparsePropagation.h
index 27c58c0afa8a..6eb6d5518a41 100644
--- a/llvm/include/llvm/Analysis/SparsePropagation.h
+++ b/llvm/include/llvm/Analysis/SparsePropagation.h
@@ -14,6 +14,7 @@
#ifndef LLVM_ANALYSIS_SPARSEPROPAGATION_H
#define LLVM_ANALYSIS_SPARSEPROPAGATION_H
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/Debug.h"
#include <set>
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index 6e3e1380535e..17d1e3f770c1 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -254,15 +254,10 @@ public:
}
// Provide value semantics.
- TargetLibraryInfo(const TargetLibraryInfo &TLI)
- : Impl(TLI.Impl), OverrideAsUnavailable(TLI.OverrideAsUnavailable) {}
+ TargetLibraryInfo(const TargetLibraryInfo &TLI) = default;
TargetLibraryInfo(TargetLibraryInfo &&TLI)
: Impl(TLI.Impl), OverrideAsUnavailable(TLI.OverrideAsUnavailable) {}
- TargetLibraryInfo &operator=(const TargetLibraryInfo &TLI) {
- Impl = TLI.Impl;
- OverrideAsUnavailable = TLI.OverrideAsUnavailable;
- return *this;
- }
+ TargetLibraryInfo &operator=(const TargetLibraryInfo &TLI) = default;
TargetLibraryInfo &operator=(TargetLibraryInfo &&TLI) {
Impl = TLI.Impl;
OverrideAsUnavailable = TLI.OverrideAsUnavailable;
@@ -445,7 +440,7 @@ public:
///
/// This will use the module's triple to construct the library info for that
/// module.
- TargetLibraryAnalysis() {}
+ TargetLibraryAnalysis() = default;
/// Construct a library analysis with baseline Module-level info.
///
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 34ef9cc61c4f..7412e050322e 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1789,7 +1789,7 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
public:
Model(T Impl) : Impl(std::move(Impl)) {}
- ~Model() override {}
+ ~Model() override = default;
const DataLayout &getDataLayout() const override {
return Impl.getDataLayout();
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 4b9ef7c57ffc..a32744f8d58b 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -42,8 +42,7 @@ protected:
public:
// Provide value semantics. MSVC requires that we spell all of these out.
- TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)
- : DL(Arg.DL) {}
+ TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default;
TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
const DataLayout &getDataLayout() const { return DL; }
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 8840929174d6..5d3b1270b538 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -372,7 +372,8 @@ enum {
// was never defined for V1.
ELFABIVERSION_AMDGPU_HSA_V2 = 0,
ELFABIVERSION_AMDGPU_HSA_V3 = 1,
- ELFABIVERSION_AMDGPU_HSA_V4 = 2
+ ELFABIVERSION_AMDGPU_HSA_V4 = 2,
+ ELFABIVERSION_AMDGPU_HSA_V5 = 3
};
#define ELF_RELOC(name, value) name = value,
diff --git a/llvm/include/llvm/BinaryFormat/MsgPackDocument.h b/llvm/include/llvm/BinaryFormat/MsgPackDocument.h
index 6d7aca89ee5b..448c7a4e0034 100644
--- a/llvm/include/llvm/BinaryFormat/MsgPackDocument.h
+++ b/llvm/include/llvm/BinaryFormat/MsgPackDocument.h
@@ -218,7 +218,7 @@ private:
/// A DocNode that is a map.
class MapDocNode : public DocNode {
public:
- MapDocNode() {}
+ MapDocNode() = default;
MapDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Map); }
// Map access methods.
@@ -248,7 +248,7 @@ public:
/// A DocNode that is an array.
class ArrayDocNode : public DocNode {
public:
- ArrayDocNode() {}
+ ArrayDocNode() = default;
ArrayDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Array); }
// Array access methods.
diff --git a/llvm/include/llvm/BinaryFormat/Swift.def b/llvm/include/llvm/BinaryFormat/Swift.def
new file mode 100644
index 000000000000..6160e2551432
--- /dev/null
+++ b/llvm/include/llvm/BinaryFormat/Swift.def
@@ -0,0 +1,26 @@
+//===- llvm/BinaryFormat/Swift.def - Swift definitions ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Macros for running through Swift enumerators.
+//
+//===----------------------------------------------------------------------===//
+
+#if !(defined HANDLE_SWIFT_SECTION)
+#error "Missing macro definition of HANDLE_SWIFT_SECTION"
+#endif
+
+#ifndef HANDLE_SWIFT_SECTION
+#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF)
+#endif
+
+HANDLE_SWIFT_SECTION(fieldmd, "__swift5_fieldmd", "swift5_fieldmd", ".sw5flmd")
+HANDLE_SWIFT_SECTION(assocty, "__swift5_assocty", "swift5_assocty", ".sw5asty")
+HANDLE_SWIFT_SECTION(builtin, "__swift5_builtin", "swift5_builtin", ".sw5bltn")
+HANDLE_SWIFT_SECTION(capture, "__swift5_capture", "swift5_capture", ".sw5cptr")
+HANDLE_SWIFT_SECTION(typeref, "__swift5_typeref", "swift5_typeref", ".sw5tyrf")
+HANDLE_SWIFT_SECTION(reflstr, "__swift5_reflstr", "swift5_reflstr", ".sw5rfst")
diff --git a/llvm/include/llvm/BinaryFormat/Swift.h b/llvm/include/llvm/BinaryFormat/Swift.h
new file mode 100644
index 000000000000..68c04f11196e
--- /dev/null
+++ b/llvm/include/llvm/BinaryFormat/Swift.h
@@ -0,0 +1,24 @@
+//===-- llvm/BinaryFormat/Swift.h ---Swift Constants-------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+
+#ifndef LLVM_BINARYFORMAT_SWIFT_H
+#define LLVM_BINARYFORMAT_SWIFT_H
+
+namespace llvm {
+namespace binaryformat {
+
+enum Swift5ReflectionSectionKind {
+#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) KIND,
+#include "llvm/BinaryFormat/Swift.def"
+#undef HANDLE_SWIFT_SECTION
+ unknown,
+ last = unknown
+};
+} // end of namespace binaryformat
+} // end of namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Bitcode/BitcodeWriter.h b/llvm/include/llvm/Bitcode/BitcodeWriter.h
index 7ad2d37a2a35..96f25fce8ddb 100644
--- a/llvm/include/llvm/Bitcode/BitcodeWriter.h
+++ b/llvm/include/llvm/Bitcode/BitcodeWriter.h
@@ -139,7 +139,7 @@ class raw_ostream;
///
/// ModHash is for use in ThinLTO incremental build, generated while the IR
/// bitcode file writing.
- void WriteThinLinkBitcodeToFile(const Module &M, raw_ostream &Out,
+ void writeThinLinkBitcodeToFile(const Module &M, raw_ostream &Out,
const ModuleSummaryIndex &Index,
const ModuleHash &ModHash);
@@ -148,7 +148,7 @@ class raw_ostream;
/// writing the combined index file for ThinLTO. When writing a subset of the
/// index for a distributed backend, provide the \p ModuleToSummariesForIndex
/// map.
- void WriteIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out,
+ void writeIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out,
const std::map<std::string, GVSummaryMapTy>
*ModuleToSummariesForIndex = nullptr);
@@ -161,7 +161,7 @@ class raw_ostream;
/// If EmbedCmdline is set, the command line is also exported in
/// the corresponding section (__LLVM,_cmdline / .llvmcmd) - even if CmdArgs
/// were empty.
- void EmbedBitcodeInModule(Module &M, MemoryBufferRef Buf, bool EmbedBitcode,
+ void embedBitcodeInModule(Module &M, MemoryBufferRef Buf, bool EmbedBitcode,
bool EmbedCmdline,
const std::vector<uint8_t> &CmdArgs);
diff --git a/llvm/include/llvm/Bitstream/BitstreamReader.h b/llvm/include/llvm/Bitstream/BitstreamReader.h
index 0393d1a51866..37b7c4d73cff 100644
--- a/llvm/include/llvm/Bitstream/BitstreamReader.h
+++ b/llvm/include/llvm/Bitstream/BitstreamReader.h
@@ -20,8 +20,7 @@
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryBufferRef.h"
#include <algorithm>
#include <cassert>
#include <climits>
diff --git a/llvm/include/llvm/CodeGen/DIE.h b/llvm/include/llvm/CodeGen/DIE.h
index 32df448b91a1..7f7372630dbe 100644
--- a/llvm/include/llvm/CodeGen/DIE.h
+++ b/llvm/include/llvm/CodeGen/DIE.h
@@ -886,8 +886,8 @@ class DIEUnit {
DIE Die;
/// The section this unit will be emitted in. This may or may not be set to
/// a valid section depending on the client that is emitting DWARF.
- MCSection *Section;
- uint64_t Offset; /// .debug_info or .debug_types absolute section offset.
+ MCSection *Section = nullptr;
+ uint64_t Offset = 0; /// .debug_info or .debug_types absolute section offset.
protected:
virtual ~DIEUnit() = default;
diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h
index 9c7e688da6a7..775698a66ada 100644
--- a/llvm/include/llvm/CodeGen/FastISel.h
+++ b/llvm/include/llvm/CodeGen/FastISel.h
@@ -217,12 +217,12 @@ protected:
/// for use in the current block. It resets to EmitStartPt when it makes sense
/// (for example, it's usually profitable to avoid function calls between the
/// definition and the use)
- MachineInstr *LastLocalValue;
+ MachineInstr *LastLocalValue = nullptr;
/// The top most instruction in the current block that is allowed for
/// emitting local variables. LastLocalValue resets to EmitStartPt when it
/// makes sense (for example, on function calls)
- MachineInstr *EmitStartPt;
+ MachineInstr *EmitStartPt = nullptr;
public:
virtual ~FastISel();
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index 3a4b3ee18e1b..f9663fadb868 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -95,7 +95,7 @@ public:
bool IsFixed = true)
: ArgInfo(Regs, OrigValue.getType(), OrigIndex, Flags, IsFixed, &OrigValue) {}
- ArgInfo() {}
+ ArgInfo() = default;
};
struct CallLoweringInfo {
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
index 79d71b2c8982..70945fcecfe5 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
@@ -30,7 +30,7 @@ class GISelChangeObserver {
SmallPtrSet<MachineInstr *, 4> ChangingAllUsesOfReg;
public:
- virtual ~GISelChangeObserver() {}
+ virtual ~GISelChangeObserver() = default;
/// An instruction is about to be erased.
virtual void erasingInstr(MachineInstr &MI) = 0;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h
index f6704df3f49d..3cacdc99dbf8 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h
@@ -465,7 +465,7 @@ private:
ScalarSizeChangeStrategies[LastOp - FirstOp + 1];
SmallVector<SizeChangeStrategy, 1>
VectorElementSizeChangeStrategies[LastOp - FirstOp + 1];
- bool TablesInitialized;
+ bool TablesInitialized = false;
// Data structures used by getAction:
SmallVector<SizeAndActionsVec, 1> ScalarActions[LastOp - FirstOp + 1];
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 9507c3411b5c..17cb53dd2d5b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -403,9 +403,9 @@ public:
class LegalizeRuleSet {
/// When non-zero, the opcode we are an alias of
- unsigned AliasOf;
+ unsigned AliasOf = 0;
/// If true, there is another opcode that aliases this one
- bool IsAliasedByAnother;
+ bool IsAliasedByAnother = false;
SmallVector<LegalizeRule, 2> Rules;
#ifndef NDEBUG
@@ -432,16 +432,6 @@ class LegalizeRuleSet {
return TypeIdx;
}
- unsigned immIdx(unsigned ImmIdx) {
- assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM -
- MCOI::OPERAND_FIRST_GENERIC_IMM) &&
- "Imm Index is out of bounds");
-#ifndef NDEBUG
- ImmIdxsCovered.set(ImmIdx);
-#endif
- return ImmIdx;
- }
-
void markAllIdxsAsCovered() {
#ifndef NDEBUG
TypeIdxsCovered.set();
@@ -556,7 +546,7 @@ class LegalizeRuleSet {
}
public:
- LegalizeRuleSet() : AliasOf(0), IsAliasedByAnother(false) {}
+ LegalizeRuleSet() = default;
bool isAliasedByAnother() { return IsAliasedByAnother; }
void setIsAliasedByAnother() { IsAliasedByAnother = true; }
@@ -568,6 +558,16 @@ public:
}
unsigned getAlias() const { return AliasOf; }
+ unsigned immIdx(unsigned ImmIdx) {
+ assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM -
+ MCOI::OPERAND_FIRST_GENERIC_IMM) &&
+ "Imm Index is out of bounds");
+#ifndef NDEBUG
+ ImmIdxsCovered.set(ImmIdx);
+#endif
+ return ImmIdx;
+ }
+
/// The instruction is legal if predicate is true.
LegalizeRuleSet &legalIf(LegalityPredicate Predicate) {
// We have no choice but conservatively assume that the free-form
@@ -824,11 +824,22 @@ public:
LegalizeRuleSet &customForCartesianProduct(std::initializer_list<LLT> Types) {
return actionForCartesianProduct(LegalizeAction::Custom, Types);
}
+ /// The instruction is custom when type indexes 0 and 1 are both in their
+ /// respective lists.
LegalizeRuleSet &
customForCartesianProduct(std::initializer_list<LLT> Types0,
std::initializer_list<LLT> Types1) {
return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1);
}
+ /// The instruction is custom when when type indexes 0, 1, and 2 are all in
+ /// their respective lists.
+ LegalizeRuleSet &
+ customForCartesianProduct(std::initializer_list<LLT> Types0,
+ std::initializer_list<LLT> Types1,
+ std::initializer_list<LLT> Types2) {
+ return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1,
+ Types2);
+ }
/// Unconditionally custom lower.
LegalizeRuleSet &custom() {
diff --git a/llvm/include/llvm/CodeGen/IntrinsicLowering.h b/llvm/include/llvm/CodeGen/IntrinsicLowering.h
index 8593f54f3961..06512f2dc560 100644
--- a/llvm/include/llvm/CodeGen/IntrinsicLowering.h
+++ b/llvm/include/llvm/CodeGen/IntrinsicLowering.h
@@ -24,10 +24,10 @@ class DataLayout;
class IntrinsicLowering {
const DataLayout &DL;
- bool Warned;
+ bool Warned = false;
public:
- explicit IntrinsicLowering(const DataLayout &DL) : DL(DL), Warned(false) {}
+ explicit IntrinsicLowering(const DataLayout &DL) : DL(DL) {}
/// Replace a call to the specified intrinsic function.
/// If an intrinsic function must be implemented by the code generator
diff --git a/llvm/include/llvm/CodeGen/LoopTraversal.h b/llvm/include/llvm/CodeGen/LoopTraversal.h
index e5810ef1ef26..93d140cabd0d 100644
--- a/llvm/include/llvm/CodeGen/LoopTraversal.h
+++ b/llvm/include/llvm/CodeGen/LoopTraversal.h
@@ -98,7 +98,7 @@ public:
bool Done = true)
: MBB(BB), PrimaryPass(Primary), IsDone(Done) {}
};
- LoopTraversal() {}
+ LoopTraversal() = default;
/// Identifies basic blocks that are part of loops and should to be
/// visited twice and returns efficient traversal order for all the blocks.
diff --git a/llvm/include/llvm/CodeGen/MIRFormatter.h b/llvm/include/llvm/CodeGen/MIRFormatter.h
index 3f145ff224ad..fb276ff117af 100644
--- a/llvm/include/llvm/CodeGen/MIRFormatter.h
+++ b/llvm/include/llvm/CodeGen/MIRFormatter.h
@@ -30,7 +30,7 @@ public:
typedef function_ref<bool(StringRef::iterator Loc, const Twine &)>
ErrorCallbackType;
- MIRFormatter() {}
+ MIRFormatter() = default;
virtual ~MIRFormatter() = default;
/// Implement target specific printing for machine operand immediate value, so
diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h
index 05a375bc251b..02eb5d24271d 100644
--- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h
@@ -392,7 +392,7 @@ struct FrameIndex {
bool IsFixed;
SMRange SourceRange;
- FrameIndex() {}
+ FrameIndex() = default;
FrameIndex(int FI, const llvm::MachineFrameInfo &MFI);
Expected<int> getFI(const llvm::MachineFrameInfo &MFI) const;
@@ -671,7 +671,7 @@ template <> struct MappingTraits<MachineFrameInfo> {
/// Targets should override this in a way that mirrors the implementation of
/// llvm::MachineFunctionInfo.
struct MachineFunctionInfo {
- virtual ~MachineFunctionInfo() {}
+ virtual ~MachineFunctionInfo() = default;
virtual void mappingImpl(IO &YamlIO) {}
};
diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 5df468102a8a..864ca73180af 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -49,14 +49,13 @@ class CalleeSavedInfo {
/// The long-term solution is to model the liveness of callee-saved registers
/// by implicit uses on the return instructions, however, the required
/// changes in the ARM backend would be quite extensive.
- bool Restored;
+ bool Restored = true;
/// Flag indicating whether the register is spilled to stack or another
/// register.
- bool SpilledToReg;
+ bool SpilledToReg = false;
public:
- explicit CalleeSavedInfo(unsigned R, int FI = 0)
- : Reg(R), FrameIdx(FI), Restored(true), SpilledToReg(false) {}
+ explicit CalleeSavedInfo(unsigned R, int FI = 0) : Reg(R), FrameIdx(FI) {}
// Accessors.
Register getReg() const { return Reg; }
@@ -180,14 +179,14 @@ private:
/// If true, the object has been sign-extended.
bool isSExt = false;
- uint8_t SSPLayout;
+ uint8_t SSPLayout = SSPLK_None;
StackObject(uint64_t Size, Align Alignment, int64_t SPOffset,
bool IsImmutable, bool IsSpillSlot, const AllocaInst *Alloca,
bool IsAliased, uint8_t StackID = 0)
: SPOffset(SPOffset), Size(Size), Alignment(Alignment),
isImmutable(IsImmutable), isSpillSlot(IsSpillSlot), StackID(StackID),
- Alloca(Alloca), isAliased(IsAliased), SSPLayout(SSPLK_None) {}
+ Alloca(Alloca), isAliased(IsAliased) {}
};
/// The alignment of the stack.
diff --git a/llvm/include/llvm/CodeGen/MachineModuleSlotTracker.h b/llvm/include/llvm/CodeGen/MachineModuleSlotTracker.h
index 0bd0a31abcae..fc7635edd82c 100644
--- a/llvm/include/llvm/CodeGen/MachineModuleSlotTracker.h
+++ b/llvm/include/llvm/CodeGen/MachineModuleSlotTracker.h
@@ -22,7 +22,7 @@ class Module;
class MachineModuleSlotTracker : public ModuleSlotTracker {
const Function &TheFunction;
const MachineModuleInfo &TheMMI;
- unsigned MDNStartSlot, MDNEndSlot;
+ unsigned MDNStartSlot = 0, MDNEndSlot = 0;
void processMachineFunctionMetadata(AbstractSlotTrackerStorage *AST,
const MachineFunction &MF);
diff --git a/llvm/include/llvm/CodeGen/MachineOperand.h b/llvm/include/llvm/CodeGen/MachineOperand.h
index f17904d54cdd..eded28183ea2 100644
--- a/llvm/include/llvm/CodeGen/MachineOperand.h
+++ b/llvm/include/llvm/CodeGen/MachineOperand.h
@@ -162,7 +162,7 @@ private:
/// ParentMI - This is the instruction that this operand is embedded into.
/// This is valid for all operand types, when the operand is in an instr.
- MachineInstr *ParentMI;
+ MachineInstr *ParentMI = nullptr;
/// Contents union - This contains the payload for the various operand types.
union ContentsUnion {
@@ -200,7 +200,7 @@ private:
} Contents;
explicit MachineOperand(MachineOperandType K)
- : OpKind(K), SubReg_TargetFlags(0), ParentMI(nullptr) {
+ : OpKind(K), SubReg_TargetFlags(0) {
// Assert that the layout is what we expect. It's easy to grow this object.
static_assert(alignof(MachineOperand) <= alignof(int64_t),
"MachineOperand shouldn't be more than 8 byte aligned");
diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h
index 3e597e728fef..08b76295dbf2 100644
--- a/llvm/include/llvm/CodeGen/MachineOutliner.h
+++ b/llvm/include/llvm/CodeGen/MachineOutliner.h
@@ -124,7 +124,7 @@ public:
unsigned FunctionIdx, unsigned Flags)
: StartIdx(StartIdx), Len(Len), FirstInst(FirstInst), LastInst(LastInst),
MBB(MBB), FunctionIdx(FunctionIdx), Flags(Flags) {}
- Candidate() {}
+ Candidate() = default;
/// Used to ensure that \p Candidates are outlined in an order that
/// preserves the start and end indices of other \p Candidates.
@@ -218,7 +218,7 @@ public:
C.Benefit = B;
}
- OutlinedFunction() {}
+ OutlinedFunction() = default;
};
} // namespace outliner
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index dbabfe5f0f32..94ae6fe02e9c 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -84,7 +84,7 @@ private:
/// The flag is true upon \p UpdatedCSRs initialization
/// and false otherwise.
- bool IsUpdatedCSRsInitialized;
+ bool IsUpdatedCSRsInitialized = false;
/// Contains the updated callee saved register list.
/// As opposed to the static list defined in register info,
diff --git a/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h b/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h
index d0fadd55d481..7c0ebe7191e4 100644
--- a/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h
+++ b/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h
@@ -16,6 +16,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
namespace llvm {
struct ReplaceWithVeclib : public PassInfoMixin<ReplaceWithVeclib> {
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index 94ba6ad91517..9cea197724cc 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -46,8 +46,8 @@ public:
MachineRegisterInfo *RegInfo;
SelectionDAG *CurDAG;
std::unique_ptr<SelectionDAGBuilder> SDB;
- AAResults *AA;
- GCFunctionInfo *GFI;
+ AAResults *AA = nullptr;
+ GCFunctionInfo *GFI = nullptr;
CodeGenOpt::Level OptLevel;
const TargetInstrInfo *TII;
const TargetLowering *TLI;
@@ -199,7 +199,7 @@ public:
protected:
/// DAGSize - Size of DAG being instruction selected.
///
- unsigned DAGSize;
+ unsigned DAGSize = 0;
/// ReplaceUses - replace all uses of the old node F with the use
/// of the new node T.
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index cd62c47abce9..04c6b50197d4 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -741,11 +741,9 @@ public:
using reference = value_type &;
use_iterator() = default;
- use_iterator(const use_iterator &I) : Op(I.Op) {}
+ use_iterator(const use_iterator &I) = default;
- bool operator==(const use_iterator &x) const {
- return Op == x.Op;
- }
+ bool operator==(const use_iterator &x) const { return Op == x.Op; }
bool operator!=(const use_iterator &x) const {
return !operator==(x);
}
diff --git a/llvm/include/llvm/CodeGen/SlotIndexes.h b/llvm/include/llvm/CodeGen/SlotIndexes.h
index b2133de93ea2..e8d618a24f9b 100644
--- a/llvm/include/llvm/CodeGen/SlotIndexes.h
+++ b/llvm/include/llvm/CodeGen/SlotIndexes.h
@@ -319,7 +319,7 @@ class raw_ostream;
using IndexList = ilist<IndexListEntry>;
IndexList indexList;
- MachineFunction *mf;
+ MachineFunction *mf = nullptr;
using Mi2IndexMap = DenseMap<const MachineInstr *, SlotIndex>;
Mi2IndexMap mi2iMap;
diff --git a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
index bc22d7789856..47bedd9befc8 100644
--- a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
+++ b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
@@ -183,12 +183,12 @@ struct JumpTableHeader {
const Value *SValue;
MachineBasicBlock *HeaderBB;
bool Emitted;
- bool FallthroughUnreachable;
+ bool FallthroughUnreachable = false;
JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
bool E = false)
: First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H),
- Emitted(E), FallthroughUnreachable(false) {}
+ Emitted(E) {}
};
using JumpTableBlock = std::pair<JumpTableHeader, JumpTable>;
@@ -218,14 +218,14 @@ struct BitTestBlock {
BitTestInfo Cases;
BranchProbability Prob;
BranchProbability DefaultProb;
- bool FallthroughUnreachable;
+ bool FallthroughUnreachable = false;
BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, bool E,
bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
BitTestInfo C, BranchProbability Pr)
: First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
- Cases(std::move(C)), Prob(Pr), FallthroughUnreachable(false) {}
+ Cases(std::move(C)), Prob(Pr) {}
};
/// Return the range of values within a range.
diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h
index 7713dd0800c0..62365330379d 100644
--- a/llvm/include/llvm/CodeGen/TargetCallingConv.h
+++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h
@@ -53,9 +53,9 @@ namespace ISD {
unsigned IsCopyElisionCandidate : 1; ///< Argument copy elision candidate
unsigned IsPointer : 1;
- unsigned ByValOrByRefSize; ///< Byval or byref struct size
+ unsigned ByValOrByRefSize = 0; ///< Byval or byref struct size
- unsigned PointerAddrSpace; ///< Address space of pointer argument
+ unsigned PointerAddrSpace = 0; ///< Address space of pointer argument
public:
ArgFlagsTy()
@@ -65,8 +65,7 @@ namespace ISD {
IsSwiftError(0), IsCFGuardTarget(0), IsHva(0), IsHvaStart(0),
IsSecArgPass(0), MemAlign(0), OrigAlign(0),
IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
- IsCopyElisionCandidate(0), IsPointer(0), ByValOrByRefSize(0),
- PointerAddrSpace(0) {
+ IsCopyElisionCandidate(0), IsPointer(0) {
static_assert(sizeof(*this) == 3 * sizeof(unsigned), "flags are too big");
}
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index bec191570594..3861648a5feb 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3485,13 +3485,19 @@ public:
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
DAGCombinerInfo &DCI) const;
+ /// Helper wrapper around SimplifyDemandedBits.
+ /// Adds Op back to the worklist upon success.
+ bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts,
+ DAGCombinerInfo &DCI) const;
+
/// More limited version of SimplifyDemandedBits that can be used to "look
/// through" ops that don't contribute to the DemandedBits/DemandedElts -
/// bitwise ops etc.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits,
const APInt &DemandedElts,
SelectionDAG &DAG,
- unsigned Depth) const;
+ unsigned Depth = 0) const;
/// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all
/// elements.
@@ -3676,11 +3682,11 @@ public:
/// Return if the N is a constant or constant vector equal to the true value
/// from getBooleanContents().
- bool isConstTrueVal(const SDNode *N) const;
+ bool isConstTrueVal(SDValue N) const;
/// Return if the N is a constant or constant vector equal to the false value
/// from getBooleanContents().
- bool isConstFalseVal(const SDNode *N) const;
+ bool isConstFalseVal(SDValue N) const;
/// Return if \p N is a True value when extended to \p VT.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const;
diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h b/llvm/include/llvm/CodeGen/VirtRegMap.h
index 4953d88340b1..42e8d294a637 100644
--- a/llvm/include/llvm/CodeGen/VirtRegMap.h
+++ b/llvm/include/llvm/CodeGen/VirtRegMap.h
@@ -39,10 +39,10 @@ class TargetInstrInfo;
};
private:
- MachineRegisterInfo *MRI;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- MachineFunction *MF;
+ MachineRegisterInfo *MRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ MachineFunction *MF = nullptr;
/// Virt2PhysMap - This is a virtual to physical register
/// mapping. Each virtual register is required to have an entry in
@@ -72,8 +72,7 @@ class TargetInstrInfo;
static char ID;
VirtRegMap()
- : MachineFunctionPass(ID), MRI(nullptr), TII(nullptr), TRI(nullptr),
- MF(nullptr), Virt2PhysMap(NO_PHYS_REG),
+ : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) {}
VirtRegMap(const VirtRegMap &) = delete;
VirtRegMap &operator=(const VirtRegMap &) = delete;
diff --git a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
index 9a5c6bcaf83f..fc8c59904cfb 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
@@ -9,6 +9,7 @@
#ifndef LLVM_DWARFLINKER_DWARFSTREAMER_H
#define LLVM_DWARFLINKER_DWARFSTREAMER_H
+#include "llvm/BinaryFormat/Swift.h"
#include "llvm/CodeGen/AccelTable.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/DWARFLinker/DWARFLinker.h"
@@ -48,7 +49,7 @@ public:
: OutFile(OutFile), OutFileType(OutFileType), Translator(Translator),
ErrorHandler(Error), WarningHandler(Warning) {}
- bool init(Triple TheTriple);
+ bool init(Triple TheTriple, StringRef Swift5ReflectionSegmentName);
/// Dump the file to the disk.
void finish();
@@ -85,6 +86,11 @@ public:
/// Emit the swift_ast section stored in \p Buffer.
void emitSwiftAST(StringRef Buffer);
+ /// Emit the swift reflection section stored in \p Buffer.
+ void emitSwiftReflectionSection(
+ llvm::binaryformat::Swift5ReflectionSectionKind ReflSectionKind,
+ StringRef Buffer, uint32_t Alignment, uint32_t Size);
+
/// Emit debug_ranges for \p FuncRange by translating the
/// original \p Entries.
void emitRangesEntries(
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
index 536583e20640..8167aaaeffb5 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
@@ -535,7 +535,7 @@ public:
: Kind(K), IsDWARF64(IsDWARF64), Offset(Offset), Length(Length),
CFIs(CodeAlign, DataAlign, Arch) {}
- virtual ~FrameEntry() {}
+ virtual ~FrameEntry() = default;
FrameKind getKind() const { return Kind; }
uint64_t getOffset() const { return Offset; }
diff --git a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h
index 6dd90499c203..d920335d373e 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h
@@ -20,7 +20,7 @@ namespace gsym {
/// string at offset zero. Strings must be UTF8 NULL terminated strings.
struct StringTable {
StringRef Data;
- StringTable() {}
+ StringTable() = default;
StringTable(StringRef D) : Data(D) {}
StringRef operator[](size_t Offset) const { return getString(Offset); }
StringRef getString(uint32_t Offset) const {
diff --git a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
index 779dc885372d..91748e15ba65 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
@@ -39,8 +39,8 @@ struct Request {
class DIPrinter {
public:
- DIPrinter() {}
- virtual ~DIPrinter() {}
+ DIPrinter() = default;
+ virtual ~DIPrinter() = default;
virtual void print(const Request &Request, const DILineInfo &Info) = 0;
virtual void print(const Request &Request, const DIInliningInfo &Info) = 0;
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 28545ed06836..760319544a02 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -1,15 +1,15 @@
-// Do not edit! -*- read-only -*-
-// See README.txt for instructions
-//===------------------------- ItaniumDemangle.h ----------------*- C++ -*-===//
-//
+//===--- ItaniumDemangle.h -----------*- mode:c++;eval:(read-only-mode) -*-===//
+// Do not edit! See README.txt.
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
-// Generic itanium demangler library. This file has two byte-per-byte identical
-// copies in the source tree, one in libcxxabi, and the other in llvm.
+// Generic itanium demangler library.
+// There are two copies of this file in the source tree. The one under
+// libcxxabi is the original and the one under llvm is the copy. Use
+// cp-to-llvm.sh to update the copy. See README.txt for more details.
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Demangle/README.txt b/llvm/include/llvm/Demangle/README.txt
index 514ff6dd16f2..76470f61f959 100644
--- a/llvm/include/llvm/Demangle/README.txt
+++ b/llvm/include/llvm/Demangle/README.txt
@@ -4,41 +4,50 @@ Itanium Name Demangler Library
Introduction
------------
-This directory contains the generic itanium name demangler library. The main
-purpose of the library is to demangle C++ symbols, i.e. convert the string
-"_Z1fv" into "f()". You can also use the CRTP base ManglingParser to perform
-some simple analysis on the mangled name, or (in LLVM) use the opaque
-ItaniumPartialDemangler to query the demangled AST.
+This directory contains the generic itanium name demangler
+library. The main purpose of the library is to demangle C++ symbols,
+i.e. convert the string "_Z1fv" into "f()". You can also use the CRTP
+base ManglingParser to perform some simple analysis on the mangled
+name, or (in LLVM) use the opaque ItaniumPartialDemangler to query the
+demangled AST.
Why are there multiple copies of the this library in the source tree?
---------------------------------------------------------------------
-This directory is mirrored between libcxxabi/demangle and
-llvm/include/llvm/Demangle. The simple reason for this is that both projects
-need to demangle symbols, but neither can depend on each other. libcxxabi needs
-the demangler to implement __cxa_demangle, which is part of the itanium ABI
-spec. LLVM needs a copy for a bunch of places, but doesn't want to use the
-system's __cxa_demangle because it a) might not be available (i.e., on Windows),
-and b) probably isn't that up-to-date on the latest language features.
-
-The copy of the demangler in LLVM has some extra stuff that aren't needed in
-libcxxabi (ie, the MSVC demangler, ItaniumPartialDemangler), which depend on the
-shared generic components. Despite these differences, we want to keep the "core"
-generic demangling library identical between both copies to simplify development
-and testing.
-
-If you're working on the generic library, then do the work first in libcxxabi,
-then run the cp-to-llvm.sh script in src/demangle. This script takes as an
-argument the path to llvm, and re-copies the changes you made to libcxxabi over.
-Note that this script just blindly overwrites all changes to the generic library
-in llvm, so be careful.
-
-Because the core demangler needs to work in libcxxabi, everything needs to be
-declared in an anonymous namespace (see DEMANGLE_NAMESPACE_BEGIN), and you can't
-introduce any code that depends on the libcxx dylib.
-
-Hopefully, when LLVM becomes a monorepo, we can de-duplicate this code, and have
-both LLVM and libcxxabi depend on a shared demangler library.
+The canonical sources are in libcxxabi/src/demangle and some of the
+files are copied to llvm/include/llvm/Demangle. The simple reason for
+this comes from before the monorepo, and both [sub]projects need to
+demangle symbols, but neither can depend on each other.
+
+* libcxxabi needs the demangler to implement __cxa_demangle, which is
+ part of the itanium ABI spec.
+
+* LLVM needs a copy for a bunch of places, and cannot rely on the
+ system's __cxa_demangle because it a) might not be available (i.e.,
+ on Windows), and b) may not be up-to-date on the latest language
+ features.
+
+The copy of the demangler in LLVM has some extra stuff that aren't
+needed in libcxxabi (ie, the MSVC demangler, ItaniumPartialDemangler),
+which depend on the shared generic components. Despite these
+differences, we want to keep the "core" generic demangling library
+identical between both copies to simplify development and testing.
+
+If you're working on the generic library, then do the work first in
+libcxxabi, then run the cp-to-llvm.sh script in src/demangle. This
+script takes as an optional argument the path to llvm, and copies the
+changes you made to libcxxabi over. Note that this script just
+blindly overwrites all changes to the generic library in llvm, so be
+careful.
+
+Because the core demangler needs to work in libcxxabi, everything
+needs to be declared in an anonymous namespace (see
+DEMANGLE_NAMESPACE_BEGIN), and you can't introduce any code that
+depends on the libcxx dylib.
+
+FIXME: Now that LLVM is a monorepo, it should be possible to
+de-duplicate this code, and have both LLVM and libcxxabi depend on a
+shared demangler library.
Testing
-------
diff --git a/llvm/include/llvm/Demangle/StringView.h b/llvm/include/llvm/Demangle/StringView.h
index 323282f69c26..6bbb8837fed1 100644
--- a/llvm/include/llvm/Demangle/StringView.h
+++ b/llvm/include/llvm/Demangle/StringView.h
@@ -1,7 +1,5 @@
-// Do not edit! -*- read-only -*-
-// See README.txt for instructions
-//===--- StringView.h -------------------------------------------*- C++ -*-===//
-//
+//===--- StringView.h ----------------*- mode:c++;eval:(read-only-mode) -*-===//
+// Do not edit! See README.txt.
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -9,6 +7,9 @@
//===----------------------------------------------------------------------===//
//
// FIXME: Use std::string_view instead when we support C++17.
+// There are two copies of this file in the source tree. The one under
+// libcxxabi is the original and the one under llvm is the copy. Use
+// cp-to-llvm.sh to update the copy. See README.txt for more details.
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h
index bec019da8680..1cf7e8f1df45 100644
--- a/llvm/include/llvm/Demangle/Utility.h
+++ b/llvm/include/llvm/Demangle/Utility.h
@@ -1,14 +1,15 @@
-// Do not edit! -*- read-only -*-
-// See README.txt for instructions
-//===--- Utility.h ----------------------------------------------*- C++ -*-===//
-//
+//===--- Utility.h -------------------*- mode:c++;eval:(read-only-mode) -*-===//
+// Do not edit! See README.txt.
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
-// Provide some utility classes for use in the demangler(s).
+// Provide some utility classes for use in the demangler.
+// There are two copies of this file in the source tree. The one in libcxxabi
+// is the original and the one in llvm is the copy. Use cp-to-llvm.sh to update
+// the copy. See README.txt for more details.
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
index ddbb3e76f145..25f1349f15f2 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
@@ -1636,7 +1636,7 @@ using AsyncLookupResult = DenseMap<StringRef, JITEvaluatedSymbol>;
/// or an error if resolution failed.
class JITLinkAsyncLookupContinuation {
public:
- virtual ~JITLinkAsyncLookupContinuation() {}
+ virtual ~JITLinkAsyncLookupContinuation() = default;
virtual void run(Expected<AsyncLookupResult> LR) = 0;
private:
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index d0168f79e3d8..c4647148f287 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -686,7 +686,7 @@ public:
MaterializationUnit(Interface I)
: SymbolFlags(std::move(I.SymbolFlags)),
InitSymbol(std::move(I.InitSymbol)) {}
- virtual ~MaterializationUnit() {}
+ virtual ~MaterializationUnit() = default;
/// Return the name of this materialization unit. Useful for debugging
/// output.
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h b/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h
index d2bf8330695f..253b1c876782 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h
@@ -29,7 +29,7 @@ class GDBJITDebugInfoRegistrationPlugin : public ObjectLinkingLayer::Plugin {
public:
class DebugSectionSynthesizer {
public:
- virtual ~DebugSectionSynthesizer() {}
+ virtual ~DebugSectionSynthesizer() = default;
virtual Error startSynthesis() = 0;
virtual Error completeSynthesisAndRegister() = 0;
};
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
index 940d0d28ae83..ac7051b5b75c 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
@@ -34,7 +34,7 @@ class ExecutionSession;
class DebugObjectRegistrar {
public:
virtual Error registerDebugObject(ExecutorAddrRange TargetMem) = 0;
- virtual ~DebugObjectRegistrar() {}
+ virtual ~DebugObjectRegistrar() = default;
};
/// Use ExecutorProcessControl to register debug objects locally or in a remote
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h b/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h
index c57264e59655..8c287f9fec0e 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h
@@ -35,7 +35,7 @@ class Task : public RTTIExtends<Task, RTTIRoot> {
public:
static char ID;
- virtual ~Task() {}
+ virtual ~Task() = default;
/// Description of the task to be performed. Used for logging.
virtual void printDescription(raw_ostream &OS) = 0;
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
index 2178acc90e2c..bee90281e086 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
@@ -113,6 +113,9 @@ enum class AddressSpace : unsigned {
Local = 5,
};
+/// \note This needs to be kept in sync with interop.h enum kmp_interop_type_t.:
+enum class OMPInteropType { Unknown, Target, TargetSync };
+
} // end namespace omp
} // end namespace llvm
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 85dd28ec3159..f60debe8411c 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1003,6 +1003,55 @@ public:
llvm::ConstantInt *Size,
const llvm::Twine &Name = Twine(""));
+ /// Create a runtime call for __tgt_interop_init
+ ///
+ /// \param Loc The insert and source location description.
+ /// \param InteropVar variable to be allocated
+ /// \param InteropType type of interop operation
+ /// \param Device devide to which offloading will occur
+ /// \param NumDependences number of dependence variables
+ /// \param DependenceAddress pointer to dependence variables
+ /// \param HaveNowaitClause does nowait clause exist
+ ///
+ /// \returns CallInst to the __tgt_interop_init call
+ CallInst *createOMPInteropInit(const LocationDescription &Loc,
+ Value *InteropVar,
+ omp::OMPInteropType InteropType, Value *Device,
+ Value *NumDependences,
+ Value *DependenceAddress,
+ bool HaveNowaitClause);
+
+ /// Create a runtime call for __tgt_interop_destroy
+ ///
+ /// \param Loc The insert and source location description.
+ /// \param InteropVar variable to be allocated
+ /// \param Device devide to which offloading will occur
+ /// \param NumDependences number of dependence variables
+ /// \param DependenceAddress pointer to dependence variables
+ /// \param HaveNowaitClause does nowait clause exist
+ ///
+ /// \returns CallInst to the __tgt_interop_destroy call
+ CallInst *createOMPInteropDestroy(const LocationDescription &Loc,
+ Value *InteropVar, Value *Device,
+ Value *NumDependences,
+ Value *DependenceAddress,
+ bool HaveNowaitClause);
+
+ /// Create a runtime call for __tgt_interop_use
+ ///
+ /// \param Loc The insert and source location description.
+ /// \param InteropVar variable to be allocated
+ /// \param Device devide to which offloading will occur
+ /// \param NumDependences number of dependence variables
+ /// \param DependenceAddress pointer to dependence variables
+ /// \param HaveNowaitClause does nowait clause exist
+ ///
+ /// \returns CallInst to the __tgt_interop_use call
+ CallInst *createOMPInteropUse(const LocationDescription &Loc,
+ Value *InteropVar, Value *Device,
+ Value *NumDependences, Value *DependenceAddress,
+ bool HaveNowaitClause);
+
/// The `omp target` interface
///
/// For more information about the usage of this interface,
@@ -1167,6 +1216,7 @@ private:
///
/// \param AllocIP Instruction to create AllocaInst before.
/// \param X The target atomic pointer to be updated
+ /// \param XElemTy The element type of the atomic pointer.
/// \param Expr The value to update X with.
/// \param AO Atomic ordering of the generated atomic
/// instructions.
@@ -1183,12 +1233,11 @@ private:
///
/// \returns A pair of the old value of X before the update, and the value
/// used for the update.
- std::pair<Value *, Value *> emitAtomicUpdate(Instruction *AllocIP, Value *X,
- Value *Expr, AtomicOrdering AO,
- AtomicRMWInst::BinOp RMWOp,
- AtomicUpdateCallbackTy &UpdateOp,
- bool VolatileX,
- bool IsXBinopExpr);
+ std::pair<Value *, Value *>
+ emitAtomicUpdate(Instruction *AllocIP, Value *X, Type *XElemTy, Value *Expr,
+ AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
+ AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
+ bool IsXBinopExpr);
/// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
///
@@ -1200,6 +1249,7 @@ public:
/// a struct to pack relevant information while generating atomic Ops
struct AtomicOpValue {
Value *Var = nullptr;
+ Type *ElemTy = nullptr;
bool IsSigned = false;
bool IsVolatile = false;
};
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index d2b70edd4d87..0c3cb3f43105 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -386,6 +386,13 @@ __OMP_RTL(__kmpc_aligned_alloc, false, VoidPtr, /* Int */ Int32, SizeTy, SizeTy,
VoidPtr)
__OMP_RTL(__kmpc_free, false, Void, /* Int */ Int32, VoidPtr, VoidPtr)
+__OMP_RTL(__tgt_interop_init, false, Void, IdentPtr, Int32, VoidPtrPtr, Int64,
+ Int32, Int32, VoidPtr, Int32)
+__OMP_RTL(__tgt_interop_destroy, false, Void, IdentPtr, Int32, VoidPtrPtr,
+ Int32, Int32, VoidPtr, Int32)
+__OMP_RTL(__tgt_interop_use, false, Void, IdentPtr, Int32, VoidPtrPtr, Int32,
+ Int32, VoidPtr, Int32)
+
__OMP_RTL(__kmpc_init_allocator, false, /* omp_allocator_handle_t */ VoidPtr,
/* Int */ Int32, /* omp_memespace_handle_t */ VoidPtr,
/* Int */ Int32, /* omp_alloctrait_t */ VoidPtr)
diff --git a/llvm/include/llvm/IR/AbstractCallSite.h b/llvm/include/llvm/IR/AbstractCallSite.h
index 31df4c75b6e7..69048554a05c 100644
--- a/llvm/include/llvm/IR/AbstractCallSite.h
+++ b/llvm/include/llvm/IR/AbstractCallSite.h
@@ -14,11 +14,11 @@
#ifndef LLVM_IR_ABSTRACTCALLSITE_H
#define LLVM_IR_ABSTRACTCALLSITE_H
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/Use.h"
-#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include <cassert>
diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h
index 5e2cfe6d81ac..74b60f1e3d05 100644
--- a/llvm/include/llvm/IR/Attributes.h
+++ b/llvm/include/llvm/IR/Attributes.h
@@ -20,7 +20,6 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/PointerLikeTypeTraits.h"
diff --git a/llvm/include/llvm/IR/CFG.h b/llvm/include/llvm/IR/CFG.h
index b872e2626981..0ee584f8af7e 100644
--- a/llvm/include/llvm/IR/CFG.h
+++ b/llvm/include/llvm/IR/CFG.h
@@ -22,6 +22,7 @@
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
@@ -31,7 +32,6 @@
namespace llvm {
-class BasicBlock;
class Instruction;
class Use;
diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h
index f36c9e620d43..fc461fc3f49f 100644
--- a/llvm/include/llvm/IR/DIBuilder.h
+++ b/llvm/include/llvm/IR/DIBuilder.h
@@ -21,7 +21,6 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/TrackingMDRef.h"
#include "llvm/Support/Casting.h"
diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index ba2568042c41..96569179060f 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -33,7 +33,6 @@
#include <cstddef>
#include <cstdint>
#include <iterator>
-#include <type_traits>
#include <vector>
// Helper macros for defining get() overrides.
diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h
index 73b0be43e136..1ea1d9787d61 100644
--- a/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -15,14 +15,16 @@
#define LLVM_IR_DIAGNOSTICINFO_H
#include "llvm-c/Types.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Support/CBindingWrapping.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TypeSize.h"
-#include "llvm/Support/YAMLTraits.h"
#include <algorithm>
#include <cstdint>
#include <functional>
@@ -33,13 +35,15 @@ namespace llvm {
// Forward declarations.
class DiagnosticPrinter;
+class DIFile;
+class DISubprogram;
class CallInst;
class Function;
class Instruction;
class InstructionCost;
-class LLVMContext;
class Module;
-class SMDiagnostic;
+class Type;
+class Value;
/// Defines the different supported severity of a diagnostic.
enum DiagnosticSeverity : char {
@@ -1049,18 +1053,20 @@ static DiagnosticSeverity getDiagnosticSeverity(SourceMgr::DiagKind DK) {
/// Diagnostic information for SMDiagnostic reporting.
class DiagnosticInfoSrcMgr : public DiagnosticInfo {
const SMDiagnostic &Diagnostic;
+ StringRef ModName;
// For inlineasm !srcloc translation.
bool InlineAsmDiag;
unsigned LocCookie;
public:
- DiagnosticInfoSrcMgr(const SMDiagnostic &Diagnostic,
+ DiagnosticInfoSrcMgr(const SMDiagnostic &Diagnostic, StringRef ModName,
bool InlineAsmDiag = true, unsigned LocCookie = 0)
: DiagnosticInfo(DK_SrcMgr, getDiagnosticSeverity(Diagnostic.getKind())),
- Diagnostic(Diagnostic), InlineAsmDiag(InlineAsmDiag),
+ Diagnostic(Diagnostic), ModName(ModName), InlineAsmDiag(InlineAsmDiag),
LocCookie(LocCookie) {}
+ StringRef getModuleName() const { return ModName; }
bool isInlineAsmDiag() const { return InlineAsmDiag; }
const SMDiagnostic &getSMDiag() const { return Diagnostic; }
unsigned getLocCookie() const { return LocCookie; }
diff --git a/llvm/include/llvm/IR/Dominators.h b/llvm/include/llvm/IR/Dominators.h
index 475355af5647..d13a5856df3b 100644
--- a/llvm/include/llvm/IR/Dominators.h
+++ b/llvm/include/llvm/IR/Dominators.h
@@ -14,23 +14,34 @@
#ifndef LLVM_IR_DOMINATORS_H
#define LLVM_IR_DOMINATORS_H
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Use.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CFGDiff.h"
+#include "llvm/Support/CFGUpdate.h"
#include "llvm/Support/GenericDomTree.h"
+#include "llvm/Support/GenericDomTreeConstruction.h"
#include <utility>
+#include <vector>
namespace llvm {
class Function;
class Instruction;
class Module;
+class Value;
class raw_ostream;
+template <class GraphType> struct GraphTraits;
extern template class DomTreeNodeBase<BasicBlock>;
extern template class DominatorTreeBase<BasicBlock, false>; // DomTree
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 53f517480ca1..a1789759960d 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -28,12 +28,13 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/FPEnv.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
@@ -44,7 +45,6 @@
#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/Casting.h"
#include <cassert>
-#include <cstddef>
#include <cstdint>
#include <functional>
#include <utility>
@@ -52,7 +52,6 @@
namespace llvm {
class APInt;
-class MDNode;
class Use;
/// This provides the default implementation of the IRBuilder
diff --git a/llvm/include/llvm/IR/IRPrintingPasses.h b/llvm/include/llvm/IR/IRPrintingPasses.h
index 2e62be7cd1ec..3fba5b81e37a 100644
--- a/llvm/include/llvm/IR/IRPrintingPasses.h
+++ b/llvm/include/llvm/IR/IRPrintingPasses.h
@@ -24,6 +24,11 @@
namespace llvm {
class raw_ostream;
class StringRef;
+class Function;
+class FunctionPass;
+class Module;
+class ModulePass;
+class Pass;
/// Create and return a pass that writes the module to the specified
/// \c raw_ostream.
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index b3d2a2c8ed9d..589926c0faf1 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1393,10 +1393,13 @@ public:
const Use &getCalledOperandUse() const { return Op<CalledOperandOpEndIdx>(); }
Use &getCalledOperandUse() { return Op<CalledOperandOpEndIdx>(); }
- /// Returns the function called, or null if this is an
- /// indirect function invocation.
+ /// Returns the function called, or null if this is an indirect function
+ /// invocation or the function signature does not match the call signature.
Function *getCalledFunction() const {
- return dyn_cast_or_null<Function>(getCalledOperand());
+ if (auto *F = dyn_cast_or_null<Function>(getCalledOperand()))
+ if (F->getValueType() == getFunctionType())
+ return F;
+ return nullptr;
}
/// Return true if the callsite is an indirect call.
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index 9878082ffffa..1937ffd36f7b 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -25,8 +25,6 @@
#include "llvm/IR/Value.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
-#include <algorithm>
-#include <cassert>
#include <cstdint>
#include <utility>
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index 84ebb461ebef..5929cff3b4fb 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -27,11 +27,9 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallingConv.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/OperandTraits.h"
@@ -52,7 +50,6 @@ namespace llvm {
class APInt;
class ConstantInt;
class DataLayout;
-class LLVMContext;
//===----------------------------------------------------------------------===//
// AllocaInst Class
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index f4e571e86493..01dada25a285 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -1194,6 +1194,17 @@ public:
ConstantInt *getIndex() const;
};
+/// This represents the llvm.instrprof.cover intrinsic.
+class InstrProfCoverInst : public InstrProfInstBase {
+public:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::instrprof_cover;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
/// This represents the llvm.instrprof.increment intrinsic.
class InstrProfIncrementInst : public InstrProfInstBase {
public:
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 3e40bbf39dd4..f5248e82ad21 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -582,6 +582,10 @@ def int_experimental_noalias_scope_decl
def int_stackprotector : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>;
def int_stackguard : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], []>;
+// A cover for instrumentation based profiling.
+def int_instrprof_cover : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty,
+ llvm_i32_ty, llvm_i32_ty]>;
+
// A counter increment for instrumentation based profiling.
def int_instrprof_increment : Intrinsic<[],
[llvm_ptr_ty, llvm_i64_ty,
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index e610c28a5923..a65ddff07a29 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -897,6 +897,14 @@ def int_aarch64_stgp : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llv
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>;
}
+//===----------------------------------------------------------------------===//
+// Memory Operations (MOPS) Intrinsics
+let TargetPrefix = "aarch64" in {
+ // Sizes are chosen to correspond to the llvm.memset intrinsic: ptr, i8, i64
+ def int_aarch64_mops_memset_tag : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i8_ty, llvm_i64_ty],
+ [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>;
+}
+
// Transactional Memory Extension (TME) Intrinsics
let TargetPrefix = "aarch64" in {
def int_aarch64_tstart : GCCBuiltin<"__builtin_arm_tstart">,
diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h
index d165a405ce22..446bcecf1c64 100644
--- a/llvm/include/llvm/IR/LLVMContext.h
+++ b/llvm/include/llvm/IR/LLVMContext.h
@@ -36,7 +36,6 @@ template <typename T> class StringMapEntry;
class StringRef;
class Twine;
class LLVMRemarkStreamer;
-class raw_ostream;
namespace remarks {
class RemarkStreamer;
diff --git a/llvm/include/llvm/IR/LLVMRemarkStreamer.h b/llvm/include/llvm/IR/LLVMRemarkStreamer.h
index e7627e993370..094ead273eed 100644
--- a/llvm/include/llvm/IR/LLVMRemarkStreamer.h
+++ b/llvm/include/llvm/IR/LLVMRemarkStreamer.h
@@ -14,14 +14,20 @@
#ifndef LLVM_IR_LLVMREMARKSTREAMER_H
#define LLVM_IR_LLVMREMARKSTREAMER_H
-#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/Remarks/RemarkStreamer.h"
+#include "llvm/Remarks/Remark.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/ToolOutputFile.h"
#include <memory>
#include <string>
namespace llvm {
+
+class DiagnosticInfoOptimizationBase;
+class LLVMContext;
+class ToolOutputFile;
+namespace remarks {
+class RemarkStreamer;
+}
+
/// Streamer for LLVM remarks which has logic for dealing with DiagnosticInfo
/// objects.
class LLVMRemarkStreamer {
diff --git a/llvm/include/llvm/IR/LegacyPassManager.h b/llvm/include/llvm/IR/LegacyPassManager.h
index 2459f0a5450a..b3a4820ba0e4 100644
--- a/llvm/include/llvm/IR/LegacyPassManager.h
+++ b/llvm/include/llvm/IR/LegacyPassManager.h
@@ -16,11 +16,11 @@
#ifndef LLVM_IR_LEGACYPASSMANAGER_H
#define LLVM_IR_LEGACYPASSMANAGER_H
-#include "llvm/Pass.h"
#include "llvm/Support/CBindingWrapping.h"
namespace llvm {
+class Function;
class Pass;
class Module;
diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h
index 51be8667f1c1..42829388b79a 100644
--- a/llvm/include/llvm/IR/MDBuilder.h
+++ b/llvm/include/llvm/IR/MDBuilder.h
@@ -16,7 +16,6 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/DataTypes.h"
#include <utility>
@@ -28,6 +27,7 @@ template <typename T> class ArrayRef;
class LLVMContext;
class Constant;
class ConstantAsMetadata;
+class Function;
class MDNode;
class MDString;
class Metadata;
diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index 26d70b4db2d5..7965884990e5 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -20,9 +20,7 @@
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/PointerUnion.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/iterator_range.h"
@@ -46,6 +44,8 @@ namespace llvm {
class Module;
class ModuleSlotTracker;
class raw_ostream;
+template <typename T> class StringMapEntry;
+template <typename ValueTy> class StringMapEntryStorage;
class Type;
enum LLVMConstants : uint32_t {
@@ -682,6 +682,10 @@ struct AAMDNodes {
// Shift tbaa.struct Metadata node to start off bytes later
static MDNode *shiftTBAAStruct(MDNode *M, size_t off);
+ // Extend tbaa Metadata node to apply to a series of bytes of length len.
+ // A size of -1 denotes an unknown size.
+ static MDNode *extendToTBAA(MDNode *TBAA, ssize_t len);
+
/// Given two sets of AAMDNodes that apply to the same pointer,
/// give the best AAMDNodes that are compatible with both (i.e. a set of
/// nodes whose allowable aliasing conclusions are a subset of those
@@ -708,6 +712,21 @@ struct AAMDNodes {
return Result;
}
+ /// Create a new AAMDNode that describes this AAMDNode after extending it to
+ /// apply to a series of bytes of length Len. A size of -1 denotes an unknown
+ /// size.
+ AAMDNodes extendTo(ssize_t Len) const {
+ AAMDNodes Result;
+ Result.TBAA = TBAA ? extendToTBAA(TBAA, Len) : nullptr;
+ // tbaa.struct contains (offset, size, type) triples. Extending the length
+ // of the tbaa.struct doesn't require changing this (though more information
+ // could be provided by adding more triples at subsequent lengths).
+ Result.TBAAStruct = TBAAStruct;
+ Result.Scope = Scope;
+ Result.NoAlias = NoAlias;
+ return Result;
+ }
+
/// Given two sets of AAMDNodes applying to potentially different locations,
/// determine the best AAMDNodes that apply to both.
AAMDNodes merge(const AAMDNodes &Other) const;
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index ec1d5ef79eed..b76bc879fb45 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -22,7 +22,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Module.h"
diff --git a/llvm/include/llvm/IR/PassInstrumentation.h b/llvm/include/llvm/IR/PassInstrumentation.h
index 8e81f30b2289..27dd075bbdb2 100644
--- a/llvm/include/llvm/IR/PassInstrumentation.h
+++ b/llvm/include/llvm/IR/PassInstrumentation.h
@@ -86,7 +86,7 @@ public:
using AnalysesClearedFunc = void(StringRef);
public:
- PassInstrumentationCallbacks() {}
+ PassInstrumentationCallbacks() = default;
/// Copying PassInstrumentationCallbacks is not intended.
PassInstrumentationCallbacks(const PassInstrumentationCallbacks &) = delete;
diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h
index e88d2233daba..12f9052a9edd 100644
--- a/llvm/include/llvm/IR/PassManager.h
+++ b/llvm/include/llvm/IR/PassManager.h
@@ -46,11 +46,8 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassInstrumentation.h"
#include "llvm/IR/PassManagerInternal.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/TypeName.h"
-#include <algorithm>
#include <cassert>
#include <cstring>
#include <iterator>
@@ -473,7 +470,7 @@ class PassManager : public PassInfoMixin<
PassManager<IRUnitT, AnalysisManagerT, ExtraArgTs...>> {
public:
/// Construct a pass manager.
- explicit PassManager() {}
+ explicit PassManager() = default;
// FIXME: These are equivalent to the default move constructor/move
// assignment. However, using = default triggers linker errors due to the
diff --git a/llvm/include/llvm/IR/PassManagerImpl.h b/llvm/include/llvm/IR/PassManagerImpl.h
index bb4fbe98b082..3c94cf2811f6 100644
--- a/llvm/include/llvm/IR/PassManagerImpl.h
+++ b/llvm/include/llvm/IR/PassManagerImpl.h
@@ -20,7 +20,7 @@
namespace llvm {
template <typename IRUnitT, typename... ExtraArgTs>
-inline AnalysisManager<IRUnitT, ExtraArgTs...>::AnalysisManager() {}
+inline AnalysisManager<IRUnitT, ExtraArgTs...>::AnalysisManager() = default;
template <typename IRUnitT, typename... ExtraArgTs>
inline AnalysisManager<IRUnitT, ExtraArgTs...>::AnalysisManager(
diff --git a/llvm/include/llvm/IR/PassTimingInfo.h b/llvm/include/llvm/IR/PassTimingInfo.h
index e44321b4af66..49a83605c47a 100644
--- a/llvm/include/llvm/IR/PassTimingInfo.h
+++ b/llvm/include/llvm/IR/PassTimingInfo.h
@@ -15,8 +15,6 @@
#ifndef LLVM_IR_PASSTIMINGINFO_H
#define LLVM_IR_PASSTIMINGINFO_H
-#include "llvm/ADT/Any.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
diff --git a/llvm/include/llvm/IR/ReplaceConstant.h b/llvm/include/llvm/IR/ReplaceConstant.h
index 5ad1d0a6f920..1d6b10d9a78b 100644
--- a/llvm/include/llvm/IR/ReplaceConstant.h
+++ b/llvm/include/llvm/IR/ReplaceConstant.h
@@ -14,13 +14,16 @@
#ifndef LLVM_IR_REPLACECONSTANT_H
#define LLVM_IR_REPLACECONSTANT_H
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Instruction.h"
#include <map>
#include <vector>
namespace llvm {
+class ConstantExpr;
+class Instruction;
+class Use;
+template <typename PtrType> class SmallPtrSetImpl;
+
/// The given instruction \p I contains given constant expression \p CE as one
/// of its operands, possibly nested within constant expression trees. Convert
/// all reachable paths from contant expression operands of \p I to \p CE into
diff --git a/llvm/include/llvm/IR/SSAContext.h b/llvm/include/llvm/IR/SSAContext.h
index 8879512610c2..8ca23e3ee077 100644
--- a/llvm/include/llvm/IR/SSAContext.h
+++ b/llvm/include/llvm/IR/SSAContext.h
@@ -15,18 +15,15 @@
#ifndef LLVM_IR_SSACONTEXT_H
#define LLVM_IR_SSACONTEXT_H
-#include "llvm/ADT/GenericSSAContext.h"
-#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/Support/Printable.h"
-#include <memory>
-
namespace llvm {
class BasicBlock;
class Function;
class Instruction;
class Value;
template <typename, bool> class DominatorTreeBase;
+template <typename _FunctionT> class GenericSSAContext;
template <> class GenericSSAContext<Function> {
Function *F;
diff --git a/llvm/include/llvm/IR/SafepointIRVerifier.h b/llvm/include/llvm/IR/SafepointIRVerifier.h
index 76b147e690be..246d236adb38 100644
--- a/llvm/include/llvm/IR/SafepointIRVerifier.h
+++ b/llvm/include/llvm/IR/SafepointIRVerifier.h
@@ -37,7 +37,7 @@ FunctionPass *createSafepointIRVerifierPass();
class SafepointIRVerifierPass : public PassInfoMixin<SafepointIRVerifierPass> {
public:
- explicit SafepointIRVerifierPass() {}
+ explicit SafepointIRVerifierPass() = default;
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
diff --git a/llvm/include/llvm/IR/Statepoint.h b/llvm/include/llvm/IR/Statepoint.h
index a254a67e6b1f..da9c732ad818 100644
--- a/llvm/include/llvm/IR/Statepoint.h
+++ b/llvm/include/llvm/IR/Statepoint.h
@@ -19,10 +19,9 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
@@ -204,11 +203,6 @@ public:
/// For example this could happen due to relocations on unwinding
/// path of invoke.
inline std::vector<const GCRelocateInst *> getGCRelocates() const;
-
- /// Returns pair of boolean flags. The first one is true is there is
- /// a gc.result intrinsic in the same block as statepoint. The second flag
- /// is true if there is an intrinsic outside of the block with statepoint.
- inline std::pair<bool, bool> getGCResultLocality() const;
};
std::vector<const GCRelocateInst *> GCStatepointInst::getGCRelocates() const {
@@ -236,18 +230,6 @@ std::vector<const GCRelocateInst *> GCStatepointInst::getGCRelocates() const {
return Result;
}
-std::pair<bool, bool> GCStatepointInst::getGCResultLocality() const {
- std::pair<bool, bool> Res(false, false);
- for (auto *U : users())
- if (auto *GRI = dyn_cast<GCResultInst>(U)) {
- if (GRI->getParent() == this->getParent())
- Res.first = true;
- else
- Res.second = true;
- }
- return Res;
-}
-
/// Call sites that get wrapped by a gc.statepoint (currently only in
/// RewriteStatepointsForGC and potentially in other passes in the future) can
/// have attributes that describe properties of gc.statepoint call they will be
diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h
index 98c97375ad7b..e4e8a5529c87 100644
--- a/llvm/include/llvm/IR/Type.h
+++ b/llvm/include/llvm/IR/Type.h
@@ -15,7 +15,6 @@
#define LLVM_IR_TYPE_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
@@ -33,6 +32,7 @@ class LLVMContext;
class PointerType;
class raw_ostream;
class StringRef;
+template <typename PtrType> class SmallPtrSetImpl;
/// The instances of the Type class are immutable: once they are created,
/// they are never changed. Also note that only one instance of a particular
diff --git a/llvm/include/llvm/IR/Use.h b/llvm/include/llvm/IR/Use.h
index 917db2679c55..64b86f3a4396 100644
--- a/llvm/include/llvm/IR/Use.h
+++ b/llvm/include/llvm/IR/Use.h
@@ -25,7 +25,6 @@
#define LLVM_IR_USE_H
#include "llvm-c/Types.h"
-#include "llvm/ADT/PointerIntPair.h"
#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/Compiler.h"
diff --git a/llvm/include/llvm/InterfaceStub/IFSStub.h b/llvm/include/llvm/InterfaceStub/IFSStub.h
index 5b16b8304692..8c3cd171b1a2 100644
--- a/llvm/include/llvm/InterfaceStub/IFSStub.h
+++ b/llvm/include/llvm/InterfaceStub/IFSStub.h
@@ -95,7 +95,7 @@ struct IFSStub {
std::vector<std::string> NeededLibs;
std::vector<IFSSymbol> Symbols;
- IFSStub() {}
+ IFSStub() = default;
IFSStub(const IFSStub &Stub);
IFSStub(IFSStub &&Stub);
};
@@ -106,7 +106,7 @@ struct IFSStub {
// This class makes it possible to map a second traits so the same data
// structure can be used for 2 different yaml schema.
struct IFSStubTriple : IFSStub {
- IFSStubTriple() {}
+ IFSStubTriple() = default;
IFSStubTriple(const IFSStub &Stub);
IFSStubTriple(const IFSStubTriple &Stub);
IFSStubTriple(IFSStubTriple &&Stub);
diff --git a/llvm/include/llvm/LineEditor/LineEditor.h b/llvm/include/llvm/LineEditor/LineEditor.h
index 0beaf1bb23a9..9f4ea5bee139 100644
--- a/llvm/include/llvm/LineEditor/LineEditor.h
+++ b/llvm/include/llvm/LineEditor/LineEditor.h
@@ -64,7 +64,7 @@ public:
/// A possible completion at a given cursor position.
struct Completion {
- Completion() {}
+ Completion() = default;
Completion(const std::string &TypedText, const std::string &DisplayText)
: TypedText(TypedText), DisplayText(DisplayText) {}
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index 88d86d5b675a..d2307d692278 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -80,6 +80,10 @@ namespace llvm {
private:
Environment Env;
+ /// The name of the Segment where Swift5 Reflection Section data will be
+ /// outputted
+ StringRef Swift5ReflectionSegmentName;
+
/// The triple for this object.
Triple TT;
@@ -399,13 +403,17 @@ namespace llvm {
const MCRegisterInfo *MRI, const MCSubtargetInfo *MSTI,
const SourceMgr *Mgr = nullptr,
MCTargetOptions const *TargetOpts = nullptr,
- bool DoAutoReset = true);
+ bool DoAutoReset = true,
+ StringRef Swift5ReflSegmentName = {});
MCContext(const MCContext &) = delete;
MCContext &operator=(const MCContext &) = delete;
~MCContext();
Environment getObjectFileType() const { return Env; }
+ const StringRef &getSwift5ReflectionSegmentName() const {
+ return Swift5ReflectionSegmentName;
+ }
const Triple &getTargetTriple() const { return TT; }
const SourceMgr *getSourceManager() const { return SrcMgr; }
diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h
index 5e0cccaba77f..3c1d10c4e62f 100644
--- a/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -15,6 +15,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/Swift.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/VersionTuple.h"
@@ -228,6 +229,10 @@ protected:
MCSection *ReadOnly8Section = nullptr;
MCSection *ReadOnly16Section = nullptr;
+ // Swift5 Reflection Data Sections
+ std::array<MCSection *, binaryformat::Swift5ReflectionSectionKind::last>
+ Swift5ReflectionSections = {};
+
public:
void initMCObjectFileInfo(MCContext &MCCtx, bool PIC,
bool LargeCodeModel = false);
@@ -423,6 +428,15 @@ public:
bool isPositionIndependent() const { return PositionIndependent; }
+ // Swift5 Reflection Data Sections
+ MCSection *getSwift5ReflectionSection(
+ llvm::binaryformat::Swift5ReflectionSectionKind ReflSectionKind) {
+ return ReflSectionKind !=
+ llvm::binaryformat::Swift5ReflectionSectionKind::unknown
+ ? Swift5ReflectionSections[ReflSectionKind]
+ : nullptr;
+ }
+
private:
bool PositionIndependent = false;
MCContext *Ctx = nullptr;
diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index 17b7446baae8..9ff68f4236ca 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -268,7 +268,7 @@ public:
// Used for decoding
uint32_t ChildrenToProcess = 0;
- MCDecodedPseudoProbeInlineTree(){};
+ MCDecodedPseudoProbeInlineTree() = default;
MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){};
// Return false if it's a dummy inline site
diff --git a/llvm/include/llvm/MCA/CustomBehaviour.h b/llvm/include/llvm/MCA/CustomBehaviour.h
index 5b993c6a5345..c4be5312ea19 100644
--- a/llvm/include/llvm/MCA/CustomBehaviour.h
+++ b/llvm/include/llvm/MCA/CustomBehaviour.h
@@ -41,7 +41,7 @@ public:
InstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
: STI(STI), MCII(MCII) {}
- virtual ~InstrPostProcess() {}
+ virtual ~InstrPostProcess() = default;
/// This method can be overriden by targets to modify the mca::Instruction
/// object after it has been lowered from the MCInst.
diff --git a/llvm/include/llvm/MCA/HWEventListener.h b/llvm/include/llvm/MCA/HWEventListener.h
index 5b5b83cccd9c..8298e0705d33 100644
--- a/llvm/include/llvm/MCA/HWEventListener.h
+++ b/llvm/include/llvm/MCA/HWEventListener.h
@@ -176,7 +176,7 @@ public:
virtual void onReleasedBuffers(const InstRef &Inst,
ArrayRef<unsigned> Buffers) {}
- virtual ~HWEventListener() {}
+ virtual ~HWEventListener() = default;
private:
virtual void anchor();
diff --git a/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h b/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h
index 7467fd6754f0..1c909b01a390 100644
--- a/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h
+++ b/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h
@@ -49,7 +49,7 @@ class ResourceStrategy {
ResourceStrategy &operator=(const ResourceStrategy &) = delete;
public:
- ResourceStrategy() {}
+ ResourceStrategy() = default;
virtual ~ResourceStrategy();
/// Selects a processor resource unit from a ReadyMask.
diff --git a/llvm/include/llvm/Object/Archive.h b/llvm/include/llvm/Object/Archive.h
index 5a5fc90f18bd..b792cbc3d9ac 100644
--- a/llvm/include/llvm/Object/Archive.h
+++ b/llvm/include/llvm/Object/Archive.h
@@ -45,7 +45,7 @@ protected:
public:
friend class Archive;
virtual std::unique_ptr<AbstractArchiveMemberHeader> clone() const = 0;
- virtual ~AbstractArchiveMemberHeader(){};
+ virtual ~AbstractArchiveMemberHeader() = default;
/// Get the name without looking up long names.
virtual Expected<StringRef> getRawName() const = 0;
diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
index e59a63d93989..c674b80c814d 100644
--- a/llvm/include/llvm/Object/ELFTypes.h
+++ b/llvm/include/llvm/Object/ELFTypes.h
@@ -699,7 +699,7 @@ private:
}
}
- Elf_Note_Iterator_Impl() {}
+ Elf_Note_Iterator_Impl() = default;
explicit Elf_Note_Iterator_Impl(Error &Err) : Err(&Err) {}
Elf_Note_Iterator_Impl(const uint8_t *Start, size_t Size, Error &Err)
: RemainingSize(Size), Err(&Err) {
diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h
index ede742c47f97..49a0706b84be 100644
--- a/llvm/include/llvm/Object/MachO.h
+++ b/llvm/include/llvm/Object/MachO.h
@@ -22,6 +22,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/BinaryFormat/Swift.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/ObjectFile.h"
@@ -583,6 +584,9 @@ public:
StringRef mapDebugSectionName(StringRef Name) const override;
+ llvm::binaryformat::Swift5ReflectionSectionKind
+ mapReflectionSectionNameToEnumValue(StringRef SectionName) const override;
+
bool hasPageZeroSegment() const { return HasPageZeroSegment; }
static bool classof(const Binary *v) {
diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h
index 12704b1fc88e..950c38a599d5 100644
--- a/llvm/include/llvm/Object/ObjectFile.h
+++ b/llvm/include/llvm/Object/ObjectFile.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/BinaryFormat/Swift.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/Error.h"
#include "llvm/Object/SymbolicFile.h"
@@ -290,6 +291,11 @@ protected:
virtual void getRelocationTypeName(DataRefImpl Rel,
SmallVectorImpl<char> &Result) const = 0;
+ virtual llvm::binaryformat::Swift5ReflectionSectionKind
+ mapReflectionSectionNameToEnumValue(StringRef SectionName) const {
+ return llvm::binaryformat::Swift5ReflectionSectionKind::unknown;
+ };
+
Expected<uint64_t> getSymbolValue(DataRefImpl Symb) const;
public:
diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h
index 9eb754a4d824..561cd54fa998 100644
--- a/llvm/include/llvm/Passes/StandardInstrumentations.h
+++ b/llvm/include/llvm/Passes/StandardInstrumentations.h
@@ -75,7 +75,7 @@ private:
class OptBisectInstrumentation {
public:
- OptBisectInstrumentation() {}
+ OptBisectInstrumentation() = default;
void registerCallbacks(PassInstrumentationCallbacks &PIC);
};
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 4d3bb0e8ff10..a416eb28906e 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -16,6 +16,7 @@
#define LLVM_PROFILEDATA_INSTRPROF_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
@@ -277,6 +278,18 @@ void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName);
/// the duplicated profile variables for Comdat functions.
bool needsComdatForCounter(const Function &F, const Module &M);
+/// An enum describing the attributes of an instrumented profile.
+enum class InstrProfKind {
+ Unknown = 0x0,
+ FE = 0x1, // A frontend clang profile, incompatible with other attrs.
+ IR = 0x2, // An IR-level profile (default when -fprofile-generate is used).
+ BB = 0x4, // A profile with entry basic block instrumentation.
+ CS = 0x8, // A context sensitive IR-level profile.
+ SingleByteCoverage = 0x10, // Use single byte probes for coverage.
+ FunctionEntryOnly = 0x20, // Only instrument the function entry basic block.
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionEntryOnly)
+};
+
const std::error_category &instrprof_category();
enum class instrprof_error {
@@ -1155,12 +1168,6 @@ struct Header {
void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart,
int64_t &RangeLast);
-// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
-// aware this is an ir_level profile so it can set the version flag.
-GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS,
- bool InstrEntryBBEnabled,
- bool DebugInfoCorrelate);
-
// Create the variable for the profile file name.
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput);
diff --git a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
index 135936b99f24..3d0076fd9035 100644
--- a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
+++ b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
@@ -55,7 +55,7 @@ public:
enum InstrProfCorrelatorKind { CK_32Bit, CK_64Bit };
InstrProfCorrelatorKind getKind() const { return Kind; }
- virtual ~InstrProfCorrelator() {}
+ virtual ~InstrProfCorrelator() = default;
protected:
struct Context {
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index 0544b6b2ef71..62054a6a3df5 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -660,6 +660,8 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
* generated profile, and 0 if this is a Clang FE generated profile.
* 1 in bit 57 indicates there are context-sensitive records in the profile.
* The 59th bit indicates whether to use debug info to correlate profiles.
+ * The 60th bit indicates single byte coverage instrumentation.
+ * The 61st bit indicates function entry instrumentation only.
*/
#define VARIANT_MASKS_ALL 0xff00000000000000ULL
#define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
@@ -667,6 +669,8 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
#define VARIANT_MASK_CSIR_PROF (0x1ULL << 57)
#define VARIANT_MASK_INSTR_ENTRY (0x1ULL << 58)
#define VARIANT_MASK_DBG_CORRELATE (0x1ULL << 59)
+#define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60)
+#define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61)
#define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version
#define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime
#define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 1326cbf0e1ce..e9dd19a69792 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -100,6 +100,16 @@ public:
/// Return true if we must provide debug info to create PGO profiles.
virtual bool useDebugInfoCorrelate() const { return false; }
+ /// Return true if the profile has single byte counters representing coverage.
+ virtual bool hasSingleByteCoverage() const = 0;
+
+ /// Return true if the profile only instruments function entries.
+ virtual bool functionEntryOnly() const = 0;
+
+ /// Returns a BitsetEnum describing the attributes of the profile. To check
+ /// individual attributes prefer using the helpers above.
+ virtual InstrProfKind getProfileKind() const = 0;
+
/// Return the PGO symtab. There are three different readers:
/// Raw, Text, and Indexed profile readers. The first two types
/// of readers are used only by llvm-profdata tool, while the indexed
@@ -176,9 +186,8 @@ private:
std::unique_ptr<MemoryBuffer> DataBuffer;
/// Iterator over the profile data.
line_iterator Line;
- bool IsIRLevelProfile = false;
- bool HasCSIRLevelProfile = false;
- bool InstrEntryBBEnabled = false;
+ /// The attributes of the current profile.
+ InstrProfKind ProfileKind = InstrProfKind::Unknown;
Error readValueProfileData(InstrProfRecord &Record);
@@ -191,11 +200,27 @@ public:
/// Return true if the given buffer is in text instrprof format.
static bool hasFormat(const MemoryBuffer &Buffer);
- bool isIRLevelProfile() const override { return IsIRLevelProfile; }
+ bool isIRLevelProfile() const override {
+ return static_cast<bool>(ProfileKind & InstrProfKind::IR);
+ }
- bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; }
+ bool hasCSIRLevelProfile() const override {
+ return static_cast<bool>(ProfileKind & InstrProfKind::CS);
+ }
- bool instrEntryBBEnabled() const override { return InstrEntryBBEnabled; }
+ bool instrEntryBBEnabled() const override {
+ return static_cast<bool>(ProfileKind & InstrProfKind::BB);
+ }
+
+ bool hasSingleByteCoverage() const override {
+ return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage);
+ }
+
+ bool functionEntryOnly() const override {
+ return static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly);
+ }
+
+ InstrProfKind getProfileKind() const override { return ProfileKind; }
/// Read the header.
Error readHeader() override;
@@ -276,6 +301,17 @@ public:
return (Version & VARIANT_MASK_DBG_CORRELATE) != 0;
}
+ bool hasSingleByteCoverage() const override {
+ return (Version & VARIANT_MASK_BYTE_COVERAGE) != 0;
+ }
+
+ bool functionEntryOnly() const override {
+ return (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0;
+ }
+
+ /// Returns a BitsetEnum describing the attributes of the raw instr profile.
+ InstrProfKind getProfileKind() const override;
+
InstrProfSymtab &getSymtab() override {
assert(Symtab.get());
return *Symtab.get();
@@ -333,7 +369,9 @@ private:
return Symtab->getFuncName(swap(NameRef));
}
- int getCounterTypeSize() const { return sizeof(uint64_t); }
+ int getCounterTypeSize() const {
+ return hasSingleByteCoverage() ? sizeof(uint8_t) : sizeof(uint64_t);
+ }
};
using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
@@ -413,6 +451,9 @@ struct InstrProfReaderIndexBase {
virtual bool isIRLevelProfile() const = 0;
virtual bool hasCSIRLevelProfile() const = 0;
virtual bool instrEntryBBEnabled() const = 0;
+ virtual bool hasSingleByteCoverage() const = 0;
+ virtual bool functionEntryOnly() const = 0;
+ virtual InstrProfKind getProfileKind() const = 0;
virtual Error populateSymtab(InstrProfSymtab &) = 0;
};
@@ -465,6 +506,16 @@ public:
return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0;
}
+ bool hasSingleByteCoverage() const override {
+ return (FormatVersion & VARIANT_MASK_BYTE_COVERAGE) != 0;
+ }
+
+ bool functionEntryOnly() const override {
+ return (FormatVersion & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0;
+ }
+
+ InstrProfKind getProfileKind() const override;
+
Error populateSymtab(InstrProfSymtab &Symtab) override {
return Symtab.create(HashTable->keys());
}
@@ -473,7 +524,7 @@ public:
/// Name matcher supporting fuzzy matching of symbol names to names in profiles.
class InstrProfReaderRemapper {
public:
- virtual ~InstrProfReaderRemapper() {}
+ virtual ~InstrProfReaderRemapper() = default;
virtual Error populateRemappings() { return Error::success(); }
virtual Error getRecords(StringRef FuncName,
ArrayRef<NamedInstrProfRecord> &Data) = 0;
@@ -523,6 +574,18 @@ public:
return Index->instrEntryBBEnabled();
}
+ bool hasSingleByteCoverage() const override {
+ return Index->hasSingleByteCoverage();
+ }
+
+ bool functionEntryOnly() const override { return Index->functionEntryOnly(); }
+
+ /// Returns a BitsetEnum describing the attributes of the indexed instr
+ /// profile.
+ InstrProfKind getProfileKind() const override {
+ return Index->getProfileKind();
+ }
+
/// Return true if the given buffer is in an indexed instrprof format.
static bool hasFormat(const MemoryBuffer &DataBuffer);
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 97c80de6aa23..af1e46cf4fc2 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -33,19 +33,17 @@ class raw_fd_ostream;
class InstrProfWriter {
public:
using ProfilingData = SmallDenseMap<uint64_t, InstrProfRecord>;
- // PF_IRLevelWithCS is the profile from context sensitive IR instrumentation.
- enum ProfKind { PF_Unknown = 0, PF_FE, PF_IRLevel, PF_IRLevelWithCS };
private:
bool Sparse;
StringMap<ProfilingData> FunctionData;
- ProfKind ProfileKind = PF_Unknown;
- bool InstrEntryBBEnabled;
+ // An enum describing the attributes of the profile.
+ InstrProfKind ProfileKind = InstrProfKind::Unknown;
// Use raw pointer here for the incomplete type object.
InstrProfRecordWriterTrait *InfoObj;
public:
- InstrProfWriter(bool Sparse = false, bool InstrEntryBBEnabled = false);
+ InstrProfWriter(bool Sparse = false);
~InstrProfWriter();
StringMap<ProfilingData> &getProfileData() { return FunctionData; }
@@ -79,30 +77,41 @@ public:
/// Write the profile, returning the raw data. For testing.
std::unique_ptr<MemoryBuffer> writeBuffer();
- /// Set the ProfileKind. Report error if mixing FE and IR level profiles.
- /// \c WithCS indicates if this is for contenxt sensitive instrumentation.
- Error setIsIRLevelProfile(bool IsIRLevel, bool WithCS) {
- if (ProfileKind == PF_Unknown) {
- if (IsIRLevel)
- ProfileKind = WithCS ? PF_IRLevelWithCS : PF_IRLevel;
- else
- ProfileKind = PF_FE;
+ /// Update the attributes of the current profile from the attributes
+ /// specified. An error is returned if IR and FE profiles are mixed.
+ Error mergeProfileKind(const InstrProfKind Other) {
+ // If the kind is unset, this is the first profile we are merging so just
+ // set it to the given type.
+ if (ProfileKind == InstrProfKind::Unknown) {
+ ProfileKind = Other;
return Error::success();
}
- if (((ProfileKind != PF_FE) && !IsIRLevel) ||
- ((ProfileKind == PF_FE) && IsIRLevel))
+ // Returns true if merging is should fail assuming A and B are incompatible.
+ auto testIncompatible = [&](InstrProfKind A, InstrProfKind B) {
+ return (static_cast<bool>(ProfileKind & A) &&
+ static_cast<bool>(Other & B)) ||
+ (static_cast<bool>(ProfileKind & B) &&
+ static_cast<bool>(Other & A));
+ };
+
+ // Check if the profiles are in-compatible. Clang frontend profiles can't be
+ // merged with other profile types.
+ if (static_cast<bool>((ProfileKind & InstrProfKind::FE) ^
+ (Other & InstrProfKind::FE))) {
return make_error<InstrProfError>(instrprof_error::unsupported_version);
+ }
+ if (testIncompatible(InstrProfKind::FunctionEntryOnly, InstrProfKind::BB)) {
+ return make_error<InstrProfError>(
+ instrprof_error::unsupported_version,
+ "cannot merge FunctionEntryOnly profiles and BB profiles together");
+ }
- // When merging a context-sensitive profile (WithCS == true) with an IRLevel
- // profile, set the kind to PF_IRLevelWithCS.
- if (ProfileKind == PF_IRLevel && WithCS)
- ProfileKind = PF_IRLevelWithCS;
-
+ // Now we update the profile type with the bits that are set.
+ ProfileKind |= Other;
return Error::success();
}
- void setInstrEntryBBEnabled(bool Enabled) { InstrEntryBBEnabled = Enabled; }
// Internal interface for testing purpose only.
void setValueProfDataEndianness(support::endianness Endianness);
void setOutputSparse(bool Sparse);
diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc
index f2cb3738f053..ff22a697965c 100644
--- a/llvm/include/llvm/ProfileData/MemProfData.inc
+++ b/llvm/include/llvm/ProfileData/MemProfData.inc
@@ -20,11 +20,10 @@
*
\*===----------------------------------------------------------------------===*/
-
#ifdef _MSC_VER
-#define PACKED(__decl__) __pragma(pack(push,1)) __decl__ __pragma(pack(pop))
+#define PACKED(...) __pragma(pack(push,1)) __VA_ARGS__ __pragma(pack(pop))
#else
-#define PACKED(__decl__) __decl__ __attribute__((__packed__))
+#define PACKED(...) __VA_ARGS__ __attribute__((__packed__))
#endif
// A 64-bit magic number to uniquely identify the raw binary memprof profile file.
@@ -47,14 +46,106 @@ PACKED(struct Header {
uint64_t StackOffset;
});
+
// A struct describing the information necessary to describe a /proc/maps
// segment entry for a particular binary/library identified by its build id.
PACKED(struct SegmentEntry {
uint64_t Start;
uint64_t End;
uint64_t Offset;
- uint8_t BuildId[32];
+ // This field is unused until sanitizer procmaps support for build ids for
+ // Linux-Elf is implemented.
+ uint8_t BuildId[32] = {0};
+
+ SegmentEntry(uint64_t S, uint64_t E, uint64_t O) :
+ Start(S), End(E), Offset(O) {}
+
+ SegmentEntry(const SegmentEntry& S) {
+ Start = S.Start;
+ End = S.End;
+ Offset = S.Offset;
+ }
+
+ SegmentEntry& operator=(const SegmentEntry& S) {
+ Start = S.Start;
+ End = S.End;
+ Offset = S.Offset;
+ return *this;
+ }
+
+ bool operator==(const SegmentEntry& S) const {
+ return Start == S.Start &&
+ End == S.End &&
+ Offset == S.Offset;
+ }
});
+
+// A struct representing the heap allocation characteristics of a particular
+// runtime context. This struct is shared between the compiler-rt runtime and
+// the raw profile reader. The indexed format uses a separate, self-describing
+// backwards compatible format.
+PACKED(struct MemInfoBlock {
+ uint32_t alloc_count;
+ uint64_t total_access_count, min_access_count, max_access_count;
+ uint64_t total_size;
+ uint32_t min_size, max_size;
+ uint32_t alloc_timestamp, dealloc_timestamp;
+ uint64_t total_lifetime;
+ uint32_t min_lifetime, max_lifetime;
+ uint32_t alloc_cpu_id, dealloc_cpu_id;
+ uint32_t num_migrated_cpu;
+
+ // Only compared to prior deallocated object currently.
+ uint32_t num_lifetime_overlaps;
+ uint32_t num_same_alloc_cpu;
+ uint32_t num_same_dealloc_cpu;
+
+ uint64_t data_type_id; // TODO: hash of type name
+
+ MemInfoBlock() : alloc_count(0) {}
+
+ MemInfoBlock(uint32_t size, uint64_t access_count, uint32_t alloc_timestamp,
+ uint32_t dealloc_timestamp, uint32_t alloc_cpu, uint32_t dealloc_cpu)
+ : alloc_count(1), total_access_count(access_count),
+ min_access_count(access_count), max_access_count(access_count),
+ total_size(size), min_size(size), max_size(size),
+ alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp),
+ total_lifetime(dealloc_timestamp - alloc_timestamp),
+ min_lifetime(total_lifetime), max_lifetime(total_lifetime),
+ alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu),
+ num_lifetime_overlaps(0), num_same_alloc_cpu(0),
+ num_same_dealloc_cpu(0) {
+ num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id;
+ }
+
+ void Merge(const MemInfoBlock &newMIB) {
+ alloc_count += newMIB.alloc_count;
+
+ total_access_count += newMIB.total_access_count;
+ min_access_count = newMIB.min_access_count < min_access_count ? newMIB.min_access_count : min_access_count;
+ max_access_count = newMIB.max_access_count < max_access_count ? newMIB.max_access_count : max_access_count;
+
+ total_size += newMIB.total_size;
+ min_size = newMIB.min_size < min_size ? newMIB.min_size : min_size;
+ max_size = newMIB.max_size < max_size ? newMIB.max_size : max_size;
+
+ total_lifetime += newMIB.total_lifetime;
+ min_lifetime = newMIB.min_lifetime < min_lifetime ? newMIB.min_lifetime : min_lifetime;
+ max_lifetime = newMIB.max_lifetime > max_lifetime ? newMIB.max_lifetime : max_lifetime;
+
+ // We know newMIB was deallocated later, so just need to check if it was
+ // allocated before last one deallocated.
+ num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp;
+ alloc_timestamp = newMIB.alloc_timestamp;
+ dealloc_timestamp = newMIB.dealloc_timestamp;
+
+ num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id;
+ num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id;
+ alloc_cpu_id = newMIB.alloc_cpu_id;
+ dealloc_cpu_id = newMIB.dealloc_cpu_id;
+ }
+});
+
} // namespace memprof
} // namespace llvm
diff --git a/llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h b/llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h
index 645a8b3c0b17..19080c0132e3 100644
--- a/llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h
+++ b/llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h
@@ -17,11 +17,12 @@
#include "llvm/Bitstream/BitstreamWriter.h"
#include "llvm/Remarks/BitstreamRemarkContainer.h"
#include "llvm/Remarks/RemarkSerializer.h"
-#include "llvm/Support/raw_ostream.h"
namespace llvm {
namespace remarks {
+struct Remarks;
+
/// Serialize the remarks to LLVM bitstream.
/// This class provides ways to emit remarks in the LLVM bitstream format and
/// its associated metadata.
diff --git a/llvm/include/llvm/Remarks/RemarkLinker.h b/llvm/include/llvm/Remarks/RemarkLinker.h
index 49fd880be8ba..79d74e39deee 100644
--- a/llvm/include/llvm/Remarks/RemarkLinker.h
+++ b/llvm/include/llvm/Remarks/RemarkLinker.h
@@ -13,7 +13,6 @@
#ifndef LLVM_REMARKS_REMARKLINKER_H
#define LLVM_REMARKS_REMARKLINKER_H
-#include "llvm/Object/ObjectFile.h"
#include "llvm/Remarks/Remark.h"
#include "llvm/Remarks/RemarkFormat.h"
#include "llvm/Remarks/RemarkStringTable.h"
@@ -22,6 +21,11 @@
#include <set>
namespace llvm {
+
+namespace object {
+class ObjectFile;
+}
+
namespace remarks {
struct RemarkLinker {
diff --git a/llvm/include/llvm/Remarks/RemarkParser.h b/llvm/include/llvm/Remarks/RemarkParser.h
index b838f75e530f..61dfdbf3c17c 100644
--- a/llvm/include/llvm/Remarks/RemarkParser.h
+++ b/llvm/include/llvm/Remarks/RemarkParser.h
@@ -13,9 +13,7 @@
#ifndef LLVM_REMARKS_REMARKPARSER_H
#define LLVM_REMARKS_REMARKPARSER_H
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Remarks/Remark.h"
#include "llvm/Remarks/RemarkFormat.h"
#include "llvm/Support/Error.h"
#include <memory>
@@ -23,11 +21,13 @@
namespace llvm {
namespace remarks {
+struct Remark;
+
class EndOfFileError : public ErrorInfo<EndOfFileError> {
public:
static char ID;
- EndOfFileError() {}
+ EndOfFileError() = default;
void log(raw_ostream &OS) const override { OS << "End of file reached."; }
std::error_code convertToErrorCode() const override {
diff --git a/llvm/include/llvm/Remarks/RemarkSerializer.h b/llvm/include/llvm/Remarks/RemarkSerializer.h
index 90e556df87e7..6217bd98d1a5 100644
--- a/llvm/include/llvm/Remarks/RemarkSerializer.h
+++ b/llvm/include/llvm/Remarks/RemarkSerializer.h
@@ -16,11 +16,15 @@
#include "llvm/Remarks/Remark.h"
#include "llvm/Remarks/RemarkFormat.h"
#include "llvm/Remarks/RemarkStringTable.h"
-#include "llvm/Support/raw_ostream.h"
namespace llvm {
+
+class raw_ostream;
+
namespace remarks {
+struct Remark;
+
enum class SerializerMode {
Separate, // A mode where the metadata is serialized separately from the
// remarks. Typically, this is used when the remarks need to be
diff --git a/llvm/include/llvm/Remarks/RemarkStreamer.h b/llvm/include/llvm/Remarks/RemarkStreamer.h
index 7741cb45b72c..b25cb0c331a4 100644
--- a/llvm/include/llvm/Remarks/RemarkStreamer.h
+++ b/llvm/include/llvm/Remarks/RemarkStreamer.h
@@ -34,10 +34,12 @@
#include "llvm/Remarks/RemarkSerializer.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Regex.h"
-#include "llvm/Support/raw_ostream.h"
#include <memory>
namespace llvm {
+
+class raw_ostream;
+
namespace remarks {
class RemarkStreamer final {
/// The regex used to filter remarks based on the passes that emit them.
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def
index 26f4bae53119..a953e9439db4 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/llvm/include/llvm/Support/AArch64TargetParser.def
@@ -204,6 +204,9 @@ AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false,
AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
AArch64::AEK_SSBS))
+AARCH64_CPU_NAME("cortex-x1c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
+ AArch64::AEK_SSBS | AArch64::AEK_PAUTH))
AARCH64_CPU_NAME("cortex-x2", ARMV9A, FK_NEON_FP_ARMV8, false,
(AArch64::AEK_MTE | AArch64::AEK_BF16 | AArch64::AEK_I8MM |
AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SB |
diff --git a/llvm/include/llvm/Support/AMDGPUMetadata.h b/llvm/include/llvm/Support/AMDGPUMetadata.h
index 784a980fee24..e0838a1f425e 100644
--- a/llvm/include/llvm/Support/AMDGPUMetadata.h
+++ b/llvm/include/llvm/Support/AMDGPUMetadata.h
@@ -44,6 +44,11 @@ constexpr uint32_t VersionMajorV4 = 1;
/// HSA metadata minor version for code object V4.
constexpr uint32_t VersionMinorV4 = 1;
+/// HSA metadata major version for code object V5.
+constexpr uint32_t VersionMajorV5 = 1;
+/// HSA metadata minor version for code object V5.
+constexpr uint32_t VersionMinorV5 = 2;
+
/// HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveBegin[] = ".amd_amdgpu_hsa_metadata";
/// HSA metadata ending assembler directive.
diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def
index 433d7fdc2c3b..80deeb2a6e9d 100644
--- a/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/llvm/include/llvm/Support/ARMTargetParser.def
@@ -328,6 +328,8 @@ ARM_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false,
ARM::AEK_I8MM))
ARM_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(ARM::AEK_FP16 | ARM::AEK_DOTPROD))
+ARM_CPU_NAME("cortex-x1c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (ARM::AEK_FP16 | ARM::AEK_DOTPROD))
ARM_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(ARM::AEK_FP16 | ARM::AEK_DOTPROD))
ARM_CPU_NAME("neoverse-n2", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
diff --git a/llvm/include/llvm/Support/BinaryStreamReader.h b/llvm/include/llvm/Support/BinaryStreamReader.h
index c664ac48daad..6853df3ccab1 100644
--- a/llvm/include/llvm/Support/BinaryStreamReader.h
+++ b/llvm/include/llvm/Support/BinaryStreamReader.h
@@ -35,16 +35,11 @@ public:
llvm::support::endianness Endian);
explicit BinaryStreamReader(StringRef Data, llvm::support::endianness Endian);
- BinaryStreamReader(const BinaryStreamReader &Other)
- : Stream(Other.Stream), Offset(Other.Offset) {}
+ BinaryStreamReader(const BinaryStreamReader &Other) = default;
- BinaryStreamReader &operator=(const BinaryStreamReader &Other) {
- Stream = Other.Stream;
- Offset = Other.Offset;
- return *this;
- }
+ BinaryStreamReader &operator=(const BinaryStreamReader &Other) = default;
- virtual ~BinaryStreamReader() {}
+ virtual ~BinaryStreamReader() = default;
/// Read as much as possible from the underlying string at the current offset
/// without invoking a copy, and set \p Buffer to the resulting data slice.
diff --git a/llvm/include/llvm/Support/BinaryStreamWriter.h b/llvm/include/llvm/Support/BinaryStreamWriter.h
index c05b0420aaa3..ce7af3650f52 100644
--- a/llvm/include/llvm/Support/BinaryStreamWriter.h
+++ b/llvm/include/llvm/Support/BinaryStreamWriter.h
@@ -35,16 +35,11 @@ public:
explicit BinaryStreamWriter(MutableArrayRef<uint8_t> Data,
llvm::support::endianness Endian);
- BinaryStreamWriter(const BinaryStreamWriter &Other)
- : Stream(Other.Stream), Offset(Other.Offset) {}
+ BinaryStreamWriter(const BinaryStreamWriter &Other) = default;
- BinaryStreamWriter &operator=(const BinaryStreamWriter &Other) {
- Stream = Other.Stream;
- Offset = Other.Offset;
- return *this;
- }
+ BinaryStreamWriter &operator=(const BinaryStreamWriter &Other) = default;
- virtual ~BinaryStreamWriter() {}
+ virtual ~BinaryStreamWriter() = default;
/// Write the bytes specified in \p Buffer to the underlying stream.
/// On success, updates the offset so that subsequent writes will occur
diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h
index 120ab1840915..c8e29ac42559 100644
--- a/llvm/include/llvm/Support/CommandLine.h
+++ b/llvm/include/llvm/Support/CommandLine.h
@@ -877,7 +877,7 @@ class basic_parser_impl { // non-template implementation of basic_parser<t>
public:
basic_parser_impl(Option &) {}
- virtual ~basic_parser_impl() {}
+ virtual ~basic_parser_impl() = default;
enum ValueExpected getValueExpectedFlagDefault() const {
return ValueRequired;
diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h
index f4c277fae7cc..f3317049524f 100644
--- a/llvm/include/llvm/Support/Compiler.h
+++ b/llvm/include/llvm/Support/Compiler.h
@@ -77,12 +77,21 @@
/// * 1916: VS2017, version 15.9
/// * 1920: VS2019, version 16.0
/// * 1921: VS2019, version 16.1
+/// * 1922: VS2019, version 16.2
+/// * 1923: VS2019, version 16.3
+/// * 1924: VS2019, version 16.4
+/// * 1925: VS2019, version 16.5
+/// * 1926: VS2019, version 16.6
+/// * 1927: VS2019, version 16.7
+/// * 1928: VS2019, version 16.8 + 16.9
+/// * 1929: VS2019, version 16.10 + 16.11
+/// * 1930: VS2022, version 17.0
#ifdef _MSC_VER
#define LLVM_MSC_PREREQ(version) (_MSC_VER >= (version))
-// We require at least MSVC 2017.
-#if !LLVM_MSC_PREREQ(1910)
-#error LLVM requires at least MSVC 2017.
+// We require at least VS 2019.
+#if !LLVM_MSC_PREREQ(1920)
+#error LLVM requires at least VS 2019.
#endif
#else
@@ -94,12 +103,8 @@
/// Sadly, this is separate from just rvalue reference support because GCC
/// and MSVC implemented this later than everything else. This appears to be
/// corrected in MSVC 2019 but not MSVC 2017.
-#if __has_feature(cxx_rvalue_references) || defined(__GNUC__) || \
- LLVM_MSC_PREREQ(1920)
+/// FIXME: Remove LLVM_HAS_RVALUE_REFERENCE_THIS macro
#define LLVM_HAS_RVALUE_REFERENCE_THIS 1
-#else
-#define LLVM_HAS_RVALUE_REFERENCE_THIS 0
-#endif
/// Expands to '&' if ref-qualifiers for *this are supported.
///
diff --git a/llvm/include/llvm/Support/FileOutputBuffer.h b/llvm/include/llvm/Support/FileOutputBuffer.h
index 17b44380e9cd..d4b73522115d 100644
--- a/llvm/include/llvm/Support/FileOutputBuffer.h
+++ b/llvm/include/llvm/Support/FileOutputBuffer.h
@@ -70,7 +70,7 @@ public:
/// If this object was previously committed, the destructor just deletes
/// this object. If this object was not committed, the destructor
/// deallocates the buffer and the target file is never written.
- virtual ~FileOutputBuffer() {}
+ virtual ~FileOutputBuffer() = default;
/// This removes the temporary file (unless it already was committed)
/// but keeps the memory mapping alive.
diff --git a/llvm/include/llvm/Support/FormatVariadicDetails.h b/llvm/include/llvm/Support/FormatVariadicDetails.h
index 2cafc120c1d7..2204cff13a64 100644
--- a/llvm/include/llvm/Support/FormatVariadicDetails.h
+++ b/llvm/include/llvm/Support/FormatVariadicDetails.h
@@ -24,7 +24,7 @@ class format_adapter {
virtual void anchor();
protected:
- virtual ~format_adapter() {}
+ virtual ~format_adapter() = default;
public:
virtual void format(raw_ostream &S, StringRef Options) = 0;
diff --git a/llvm/include/llvm/Support/GenericDomTree.h b/llvm/include/llvm/Support/GenericDomTree.h
index f39400c26eab..d7c64bf62c7a 100644
--- a/llvm/include/llvm/Support/GenericDomTree.h
+++ b/llvm/include/llvm/Support/GenericDomTree.h
@@ -260,7 +260,7 @@ protected:
friend struct DomTreeBuilder::SemiNCAInfo<DominatorTreeBase>;
public:
- DominatorTreeBase() {}
+ DominatorTreeBase() = default;
DominatorTreeBase(DominatorTreeBase &&Arg)
: Roots(std::move(Arg.Roots)),
diff --git a/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h b/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h
index 3bafeb48f64a..96105d6b4684 100644
--- a/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h
+++ b/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h
@@ -37,7 +37,7 @@ namespace IDFCalculatorDetail {
/// May be specialized if, for example, one wouldn't like to return nullpointer
/// successors.
template <class NodeTy, bool IsPostDom> struct ChildrenGetterTy {
- using NodeRef = typename GraphTraits<NodeTy>::NodeRef;
+ using NodeRef = typename GraphTraits<NodeTy *>::NodeRef;
using ChildrenTy = SmallVector<NodeRef, 8>;
ChildrenTy get(const NodeRef &N);
diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h
index 5ef0ba31f785..96b7753e9b20 100644
--- a/llvm/include/llvm/Support/KnownBits.h
+++ b/llvm/include/llvm/Support/KnownBits.h
@@ -31,7 +31,7 @@ private:
public:
// Default construct Zero and One.
- KnownBits() {}
+ KnownBits() = default;
/// Create a known bits object of BitWidth bits initialized to unknown.
KnownBits(unsigned BitWidth) : Zero(BitWidth, 0), One(BitWidth, 0) {}
diff --git a/llvm/include/llvm/Support/RISCVISAInfo.h b/llvm/include/llvm/Support/RISCVISAInfo.h
index b450c1df3558..7fa0e6ee3acf 100644
--- a/llvm/include/llvm/Support/RISCVISAInfo.h
+++ b/llvm/include/llvm/Support/RISCVISAInfo.h
@@ -92,6 +92,9 @@ private:
void updateFLen();
void updateMinVLen();
void updateMaxELen();
+
+ static llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+ postProcessAndChecking(std::unique_ptr<RISCVISAInfo> &&ISAInfo);
};
} // namespace llvm
diff --git a/llvm/include/llvm/Support/ScopedPrinter.h b/llvm/include/llvm/Support/ScopedPrinter.h
index 9bde4f455a2d..6b5daf710c9f 100644
--- a/llvm/include/llvm/Support/ScopedPrinter.h
+++ b/llvm/include/llvm/Support/ScopedPrinter.h
@@ -115,7 +115,7 @@ public:
return SP->getKind() == ScopedPrinterKind::Base;
}
- virtual ~ScopedPrinter() {}
+ virtual ~ScopedPrinter() = default;
void flush() { OS.flush(); }
@@ -792,13 +792,13 @@ private:
struct DelimitedScope {
DelimitedScope(ScopedPrinter &W) : W(&W) {}
DelimitedScope() : W(nullptr) {}
- virtual ~DelimitedScope(){};
+ virtual ~DelimitedScope() = default;
virtual void setPrinter(ScopedPrinter &W) = 0;
ScopedPrinter *W;
};
struct DictScope : DelimitedScope {
- explicit DictScope() {}
+ explicit DictScope() = default;
explicit DictScope(ScopedPrinter &W) : DelimitedScope(W) { W.objectBegin(); }
DictScope(ScopedPrinter &W, StringRef N) : DelimitedScope(W) {
@@ -817,7 +817,7 @@ struct DictScope : DelimitedScope {
};
struct ListScope : DelimitedScope {
- explicit ListScope() {}
+ explicit ListScope() = default;
explicit ListScope(ScopedPrinter &W) : DelimitedScope(W) { W.arrayBegin(); }
ListScope(ScopedPrinter &W, StringRef N) : DelimitedScope(W) {
diff --git a/llvm/include/llvm/Support/SuffixTree.h b/llvm/include/llvm/Support/SuffixTree.h
index 352fba511937..162a1de72f1a 100644
--- a/llvm/include/llvm/Support/SuffixTree.h
+++ b/llvm/include/llvm/Support/SuffixTree.h
@@ -109,7 +109,7 @@ struct SuffixTreeNode {
SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link)
: StartIdx(StartIdx), EndIdx(EndIdx), Link(Link) {}
- SuffixTreeNode() {}
+ SuffixTreeNode() = default;
};
/// A data structure for fast substring queries.
diff --git a/llvm/include/llvm/Support/Timer.h b/llvm/include/llvm/Support/Timer.h
index eb49e805b40d..742d20ce51dd 100644
--- a/llvm/include/llvm/Support/Timer.h
+++ b/llvm/include/llvm/Support/Timer.h
@@ -106,7 +106,7 @@ public:
~Timer();
/// Create an uninitialized timer, client must use 'init'.
- explicit Timer() {}
+ explicit Timer() = default;
void init(StringRef TimerName, StringRef TimerDescription);
void init(StringRef TimerName, StringRef TimerDescription, TimerGroup &tg);
diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index add05bd078d6..1157487eced3 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -2015,7 +2015,7 @@ class Resolver {
public:
explicit Resolver(Record *CurRec) : CurRec(CurRec) {}
- virtual ~Resolver() {}
+ virtual ~Resolver() = default;
Record *getCurrentRecord() const { return CurRec; }
diff --git a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
index 03ead4bc0714..072ccf7320e8 100644
--- a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
+++ b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
@@ -18,6 +18,7 @@
#define LLVM_TRANSFORMS_AGGRESSIVEINSTCOMBINE_AGGRESSIVEINSTCOMBINE_H
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
namespace llvm {
diff --git a/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h b/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h
index 6a208dfa6a25..78b2f909f1c9 100644
--- a/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h
+++ b/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h
@@ -15,6 +15,7 @@
#define LLVM_TRANSFORMS_IPO_ALWAYSINLINER_H
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
namespace llvm {
diff --git a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
index 6d6cb58abdbb..225def99678a 100644
--- a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
+++ b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
@@ -27,14 +27,6 @@ class ArgumentPromotionPass : public PassInfoMixin<ArgumentPromotionPass> {
public:
ArgumentPromotionPass(unsigned MaxElements = 3u) : MaxElements(MaxElements) {}
- /// Check if callers and the callee \p F agree how promoted arguments would be
- /// passed. The ones that they do not agree on are eliminated from the sets but
- /// the return value has to be observed as well.
- static bool areFunctionArgsABICompatible(
- const Function &F, const TargetTransformInfo &TTI,
- SmallPtrSetImpl<Argument *> &ArgsToPromote,
- SmallPtrSetImpl<Argument *> &ByValArgsToTransform);
-
/// Checks if a type could have padding bytes.
static bool isDenselyPacked(Type *type, const DataLayout &DL);
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index d56a43ec7961..7eee16f71d64 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -132,6 +132,7 @@ struct AbstractAttribute;
struct InformationCache;
struct AAIsDead;
struct AttributorCallGraph;
+struct IRPosition;
class AAResults;
class Function;
@@ -139,6 +140,11 @@ class Function;
/// Abstract Attribute helper functions.
namespace AA {
+/// Return true if \p I is a `nosync` instruction. Use generic reasoning and
+/// potentially the corresponding AANoSync.
+bool isNoSyncInst(Attributor &A, const Instruction &I,
+ const AbstractAttribute &QueryingAA);
+
/// Return true if \p V is dynamically unique, that is, there are no two
/// "instances" of \p V at runtime with different values.
bool isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
@@ -185,7 +191,8 @@ Constant *getInitialValueForObj(Value &Obj, Type &Ty,
bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
SmallVectorImpl<Value *> &Objects,
const AbstractAttribute &QueryingAA,
- const Instruction *CtxI);
+ const Instruction *CtxI,
+ bool Intraprocedural = false);
/// Collect all potential values of the one stored by \p SI into
/// \p PotentialCopies. That is, the only copies that were made via the
@@ -200,6 +207,34 @@ bool getPotentialCopiesOfStoredValue(
Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation);
+/// Return true if \p IRP is readonly. This will query respective AAs that
+/// deduce the information and introduce dependences for \p QueryingAA.
+bool isAssumedReadOnly(Attributor &A, const IRPosition &IRP,
+ const AbstractAttribute &QueryingAA, bool &IsKnown);
+
+/// Return true if \p IRP is readnone. This will query respective AAs that
+/// deduce the information and introduce dependences for \p QueryingAA.
+bool isAssumedReadNone(Attributor &A, const IRPosition &IRP,
+ const AbstractAttribute &QueryingAA, bool &IsKnown);
+
+/// Return true if \p ToI is potentially reachable from \p FromI. The two
+/// instructions do not need to be in the same function. \p GoBackwardsCB
+/// can be provided to convey domain knowledge about the "lifespan" the user is
+/// interested in. By default, the callers of \p FromI are checked as well to
+/// determine if \p ToI can be reached. If the query is not interested in
+/// callers beyond a certain point, e.g., a GPU kernel entry or the function
+/// containing an alloca, the \p GoBackwardsCB should return false.
+bool isPotentiallyReachable(
+ Attributor &A, const Instruction &FromI, const Instruction &ToI,
+ const AbstractAttribute &QueryingAA,
+ std::function<bool(const Function &F)> GoBackwardsCB = nullptr);
+
+/// Same as above but it is sufficient to reach any instruction in \p ToFn.
+bool isPotentiallyReachable(
+ Attributor &A, const Instruction &FromI, const Function &ToFn,
+ const AbstractAttribute &QueryingAA,
+ std::function<bool(const Function &F)> GoBackwardsCB);
+
} // namespace AA
/// The value passed to the line option that defines the maximal initialization
@@ -227,7 +262,7 @@ enum class DepClassTy {
/// The data structure for the nodes of a dependency graph
struct AADepGraphNode {
public:
- virtual ~AADepGraphNode(){};
+ virtual ~AADepGraphNode() = default;
using DepTy = PointerIntPair<AADepGraphNode *, 1>;
protected:
@@ -266,8 +301,8 @@ public:
/// then it means that B depends on A, and when the state of A is
/// updated, node B should also be updated
struct AADepGraph {
- AADepGraph() {}
- ~AADepGraph() {}
+ AADepGraph() = default;
+ ~AADepGraph() = default;
using DepTy = AADepGraphNode::DepTy;
static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
@@ -334,6 +369,14 @@ struct IRPosition {
return IRPosition(const_cast<Value &>(V), IRP_FLOAT, CBContext);
}
+ /// Create a position describing the instruction \p I. This is different from
+ /// the value version because call sites are treated as intrusctions rather
+ /// than their return value in this function.
+ static const IRPosition inst(const Instruction &I,
+ const CallBaseContext *CBContext = nullptr) {
+ return IRPosition(const_cast<Instruction &>(I), IRP_FLOAT, CBContext);
+ }
+
/// Create a position describing the function scope of \p F.
/// \p CBContext is used for call base specific analysis.
static const IRPosition function(const Function &F,
@@ -662,7 +705,7 @@ private:
break;
case IRPosition::IRP_FLOAT:
// Special case for floating functions.
- if (isa<Function>(AnchorVal))
+ if (isa<Function>(AnchorVal) || isa<CallBase>(AnchorVal))
Enc = {&AnchorVal, ENC_FLOATING_FUNCTION};
else
Enc = {&AnchorVal, ENC_VALUE};
@@ -844,7 +887,7 @@ struct AnalysisGetter {
}
AnalysisGetter(FunctionAnalysisManager &FAM) : FAM(&FAM) {}
- AnalysisGetter() {}
+ AnalysisGetter() = default;
private:
FunctionAnalysisManager *FAM = nullptr;
@@ -879,7 +922,7 @@ struct InformationCache {
[&](const Function &F) {
return AG.getAnalysis<PostDominatorTreeAnalysis>(F);
}),
- AG(AG), CGSCC(CGSCC), TargetTriple(M.getTargetTriple()) {
+ AG(AG), TargetTriple(M.getTargetTriple()) {
if (CGSCC)
initializeModuleSlice(*CGSCC);
}
@@ -996,13 +1039,6 @@ struct InformationCache {
return AG.getAnalysis<AP>(F);
}
- /// Return SCC size on call graph for function \p F or 0 if unknown.
- unsigned getSccSize(const Function &F) {
- if (CGSCC && CGSCC->count(const_cast<Function *>(&F)))
- return CGSCC->size();
- return 0;
- }
-
/// Return datalayout used in the module.
const DataLayout &getDL() { return DL; }
@@ -1092,9 +1128,6 @@ private:
/// Getters for analysis.
AnalysisGetter &AG;
- /// The underlying CGSCC, or null if not available.
- SetVector<Function *> *CGSCC;
-
/// Set of inlineable functions
SmallPtrSet<const Function *, 8> InlineableFunctions;
@@ -1362,6 +1395,9 @@ struct Attributor {
return AA;
}
+ /// Allows a query AA to request an update if a new query was received.
+ void registerForUpdate(AbstractAttribute &AA);
+
/// Explicitly record a dependence from \p FromAA to \p ToAA, that is if
/// \p FromAA changes \p ToAA should be updated as well.
///
@@ -1794,6 +1830,18 @@ public:
const AbstractAttribute &QueryingAA,
bool RequireAllCallSites, bool &AllCallSitesKnown);
+ /// Check \p Pred on all call sites of \p Fn.
+ ///
+ /// This method will evaluate \p Pred on call sites and return
+ /// true if \p Pred holds in every call sites. However, this is only possible
+ /// all call sites are known, hence the function has internal linkage.
+ /// If true is returned, \p AllCallSitesKnown is set if all possible call
+ /// sites of the function have been visited.
+ bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
+ const Function &Fn, bool RequireAllCallSites,
+ const AbstractAttribute *QueryingAA,
+ bool &AllCallSitesKnown);
+
/// Check \p Pred on all values potentially returned by \p F.
///
/// This method will evaluate \p Pred on all values potentially returned by
@@ -1932,18 +1980,6 @@ private:
/// may trigger further updates. (\see DependenceStack)
void rememberDependences();
- /// Check \p Pred on all call sites of \p Fn.
- ///
- /// This method will evaluate \p Pred on call sites and return
- /// true if \p Pred holds in every call sites. However, this is only possible
- /// all call sites are known, hence the function has internal linkage.
- /// If true is returned, \p AllCallSitesKnown is set if all possible call
- /// sites of the function have been visited.
- bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
- const Function &Fn, bool RequireAllCallSites,
- const AbstractAttribute *QueryingAA,
- bool &AllCallSitesKnown);
-
/// Determine if CallBase context in \p IRP should be propagated.
bool shouldPropagateCallBaseContext(const IRPosition &IRP);
@@ -2056,6 +2092,10 @@ private:
/// Callback to get an OptimizationRemarkEmitter from a Function *.
Optional<OptimizationRemarkGetter> OREGetter;
+ /// Container with all the query AAs that requested an update via
+ /// registerForUpdate.
+ SmallSetVector<AbstractAttribute *, 16> QueryAAsAwaitingUpdate;
+
/// The name of the pass to emit remarks for.
const char *PassName = "";
@@ -2081,7 +2121,7 @@ private:
/// additional methods to directly modify the state based if needed. See the
/// class comments for help.
struct AbstractState {
- virtual ~AbstractState() {}
+ virtual ~AbstractState() = default;
/// Return if this abstract state is in a valid state. If false, no
/// information provided should be used.
@@ -2122,7 +2162,7 @@ template <typename base_ty, base_ty BestState, base_ty WorstState>
struct IntegerStateBase : public AbstractState {
using base_t = base_ty;
- IntegerStateBase() {}
+ IntegerStateBase() = default;
IntegerStateBase(base_t Assumed) : Assumed(Assumed) {}
/// Return the best possible representable state.
@@ -2365,7 +2405,7 @@ struct BooleanState : public IntegerStateBase<bool, true, false> {
using super = IntegerStateBase<bool, true, false>;
using base_t = IntegerStateBase::base_t;
- BooleanState() {}
+ BooleanState() = default;
BooleanState(base_t Assumed) : super(Assumed) {}
/// Set the assumed value to \p Value but never below the known one.
@@ -2773,7 +2813,7 @@ struct AbstractAttribute : public IRPosition, public AADepGraphNode {
AbstractAttribute(const IRPosition &IRP) : IRPosition(IRP) {}
/// Virtual destructor.
- virtual ~AbstractAttribute() {}
+ virtual ~AbstractAttribute() = default;
/// This function is used to identify if an \p DGN is of type
/// AbstractAttribute so that the dyn_cast and cast can use such information
@@ -2793,6 +2833,14 @@ struct AbstractAttribute : public IRPosition, public AADepGraphNode {
/// in the `updateImpl` method.
virtual void initialize(Attributor &A) {}
+ /// A query AA is always scheduled as long as we do updates because it does
+ /// lazy computation that cannot be determined to be done from the outside.
+ /// However, while query AAs will not be fixed if they do not have outstanding
+ /// dependences, we will only schedule them like other AAs. If a query AA that
+ /// received a new query it needs to request an update via
+ /// `Attributor::requestUpdateForAA`.
+ virtual bool isQueryAA() const { return false; }
+
/// Return the internal abstract state for inspection.
virtual StateType &getState() = 0;
virtual const StateType &getState() const = 0;
@@ -2989,6 +3037,14 @@ struct AANoSync
/// Returns true if "nosync" is known.
bool isKnownNoSync() const { return getKnown(); }
+ /// Helper function used to determine whether an instruction is non-relaxed
+ /// atomic. In other words, if an atomic instruction does not have unordered
+ /// or monotonic ordering
+ static bool isNonRelaxedAtomic(const Instruction *I);
+
+ /// Helper function specific for intrinsics which are potentially volatile.
+ static bool isNoSyncIntrinsic(const Instruction *I);
+
/// Create an abstract attribute view for the position \p IRP.
static AANoSync &createForPosition(const IRPosition &IRP, Attributor &A);
@@ -4419,7 +4475,7 @@ private:
struct AACallGraphNode {
AACallGraphNode(Attributor &A) : A(A) {}
- virtual ~AACallGraphNode() {}
+ virtual ~AACallGraphNode() = default;
virtual AACallEdgeIterator optimisticEdgesBegin() const = 0;
virtual AACallEdgeIterator optimisticEdgesEnd() const = 0;
@@ -4485,7 +4541,7 @@ struct AACallEdges : public StateWrapper<BooleanState, AbstractAttribute>,
// Synthetic root node for the Attributor's internal call graph.
struct AttributorCallGraph : public AACallGraphNode {
AttributorCallGraph(Attributor &A) : AACallGraphNode(A) {}
- virtual ~AttributorCallGraph() {}
+ virtual ~AttributorCallGraph() = default;
AACallEdgeIterator optimisticEdgesBegin() const override {
return AACallEdgeIterator(A, A.Functions.begin());
@@ -4592,18 +4648,30 @@ struct AAFunctionReachability
AAFunctionReachability(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+ /// See AbstractAttribute::isQueryAA.
+ bool isQueryAA() const override { return true; }
+
/// If the function represented by this possition can reach \p Fn.
- virtual bool canReach(Attributor &A, Function *Fn) const = 0;
+ virtual bool canReach(Attributor &A, const Function &Fn) const = 0;
+
+ /// Can \p CB reach \p Fn.
+ virtual bool canReach(Attributor &A, CallBase &CB,
+ const Function &Fn) const = 0;
- /// Can \p CB reach \p Fn
- virtual bool canReach(Attributor &A, CallBase &CB, Function *Fn) const = 0;
+ /// Can \p Inst reach \p Fn.
+ /// See also AA::isPotentiallyReachable.
+ virtual bool instructionCanReach(Attributor &A, const Instruction &Inst,
+ const Function &Fn,
+ bool UseBackwards = true) const = 0;
/// Create an abstract attribute view for the position \p IRP.
static AAFunctionReachability &createForPosition(const IRPosition &IRP,
Attributor &A);
/// See AbstractAttribute::getName()
- const std::string getName() const override { return "AAFuncitonReacability"; }
+ const std::string getName() const override {
+ return "AAFunctionReachability";
+ }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
@@ -4639,21 +4707,12 @@ struct AAPointerInfo : public AbstractAttribute {
AccessKind Kind, Type *Ty)
: LocalI(LocalI), RemoteI(RemoteI), Content(Content), Kind(Kind),
Ty(Ty) {}
- Access(const Access &Other)
- : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content),
- Kind(Other.Kind), Ty(Other.Ty) {}
+ Access(const Access &Other) = default;
Access(const Access &&Other)
: LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content),
Kind(Other.Kind), Ty(Other.Ty) {}
- Access &operator=(const Access &Other) {
- LocalI = Other.LocalI;
- RemoteI = Other.RemoteI;
- Content = Other.Content;
- Kind = Other.Kind;
- Ty = Other.Ty;
- return *this;
- }
+ Access &operator=(const Access &Other) = default;
bool operator==(const Access &R) const {
return LocalI == R.LocalI && RemoteI == R.RemoteI &&
Content == R.Content && Kind == R.Kind;
@@ -4741,6 +4800,15 @@ struct AAPointerInfo : public AbstractAttribute {
virtual bool forallInterferingAccesses(
StoreInst &SI, function_ref<bool(const Access &, bool)> CB) const = 0;
+ /// Call \p CB on all write accesses that might interfere with \p LI and
+ /// return true if all such accesses were known and the callback returned true
+ /// for all of them, false otherwise. In contrast to forallInterferingAccesses
+ /// this function will perform reasoning to exclude write accesses that cannot
+ /// affect the load even if they on the surface look as if they would.
+ virtual bool forallInterferingWrites(
+ Attributor &A, const AbstractAttribute &QueryingAA, LoadInst &LI,
+ function_ref<bool(const Access &, bool)> CB) const = 0;
+
/// This function should return true if the type of the \p AA is AAPointerInfo
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
diff --git a/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
index fd99843d0449..a2b93f8aa30d 100644
--- a/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
+++ b/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
@@ -14,6 +14,7 @@
#define LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
namespace llvm {
diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
index ed74c8ed0e96..e4807a1c9c65 100644
--- a/llvm/include/llvm/Transforms/IPO/IROutliner.h
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -337,11 +337,9 @@ private:
/// be analyzed for similarity. This is needed as there may be instruction we
/// can identify as having similarity, but are more complicated to outline.
struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> {
- InstructionAllowed() {}
+ InstructionAllowed() = default;
- bool visitBranchInst(BranchInst &BI) {
- return EnableBranches;
- }
+ bool visitBranchInst(BranchInst &BI) { return EnableBranches; }
bool visitPHINode(PHINode &PN) { return EnableBranches; }
// TODO: Handle allocas.
bool visitAllocaInst(AllocaInst &AI) { return false; }
@@ -359,7 +357,7 @@ private:
bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; }
// TODO: Handle specific intrinsics individually from those that can be
// handled.
- bool IntrinsicInst(IntrinsicInst &II) { return false; }
+ bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; }
// We only handle CallInsts that are not indirect, since we cannot guarantee
// that they have a name in these cases.
bool visitCallInst(CallInst &CI) {
@@ -395,6 +393,10 @@ private:
// The flag variable that marks whether we should allow indirect calls
// to be outlined.
bool EnableIndirectCalls = true;
+
+ // The flag variable that marks whether we should allow intrinsics
+ // instructions to be outlined.
+ bool EnableIntrinsics = false;
};
/// A InstVisitor used to exclude certain instructions from being outlined.
diff --git a/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h
index bb7907fb8ac8..302695d96355 100644
--- a/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h
+++ b/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h
@@ -17,6 +17,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
namespace llvm {
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfile.h b/llvm/include/llvm/Transforms/IPO/SampleProfile.h
index 2b05aaf320cf..704b793ab3ea 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfile.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfile.h
@@ -15,6 +15,7 @@
#define LLVM_TRANSFORMS_IPO_SAMPLEPROFILE_H
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
#include <string>
namespace llvm {
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
index 43f4bc78140f..e73c36043cb2 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -154,7 +154,7 @@ class PseudoProbeUpdatePass : public PassInfoMixin<PseudoProbeUpdatePass> {
void runOnFunction(Function &F, FunctionAnalysisManager &FAM);
public:
- PseudoProbeUpdatePass() {}
+ PseudoProbeUpdatePass() = default;
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index f8cb6dc73a6f..ae19fbfb49a7 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -93,7 +93,7 @@ public:
MinimizeSize(MinimizeSize), AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL),
SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {}
- virtual ~InstCombiner() {}
+ virtual ~InstCombiner() = default;
/// Return the source operand of a potentially bitcasted value while
/// optionally checking if it has one use. If there is no bitcast or the one
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
index 6002f0270083..a0d8118c23f7 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
@@ -16,6 +16,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizerOptions.h"
namespace llvm {
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
index 5a0fb835606a..0a5456c5956f 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
@@ -66,7 +66,7 @@ bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT,
}
SmallVector<Instruction *, 8> ReachableRetVec;
unsigned NumCoveredExits = 0;
- for (auto &RI : RetVec) {
+ for (auto *RI : RetVec) {
if (!isPotentiallyReachable(Start, RI, nullptr, &DT))
continue;
ReachableRetVec.push_back(RI);
@@ -83,7 +83,7 @@ bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT,
for (auto *End : Ends)
Callback(End);
} else {
- for (auto &RI : ReachableRetVec)
+ for (auto *RI : ReachableRetVec)
Callback(RI);
// We may have inserted untag outside of the lifetime interval.
// Signal the caller to remove the lifetime end call for this alloca.
diff --git a/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h b/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h
index 8d70f1429b99..76d586252743 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h
@@ -10,6 +10,7 @@
#define LLVM_TRANSFORMS_INSTRUMENTATION_BOUNDSCHECKING_H
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
namespace llvm {
diff --git a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
index 3118a3762935..70949026a892 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
@@ -15,6 +15,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
namespace llvm {
diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
index 64523d7d073c..5873db22a5d1 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
@@ -87,21 +87,32 @@ private:
/// Count the number of instrumented value sites for the function.
void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
- /// Replace instrprof_value_profile with a call to runtime library.
+ /// Replace instrprof.value.profile with a call to runtime library.
void lowerValueProfileInst(InstrProfValueProfileInst *Ins);
- /// Replace instrprof_increment with an increment of the appropriate value.
+ /// Replace instrprof.cover with a store instruction to the coverage byte.
+ void lowerCover(InstrProfCoverInst *Inc);
+
+ /// Replace instrprof.increment with an increment of the appropriate value.
void lowerIncrement(InstrProfIncrementInst *Inc);
/// Force emitting of name vars for unused functions.
void lowerCoverageData(GlobalVariable *CoverageNamesVar);
+ /// Compute the address of the counter value that this profiling instruction
+ /// acts on.
+ Value *getCounterAddress(InstrProfInstBase *I);
+
/// Get the region counters for an increment, creating them if necessary.
///
/// If the counter array doesn't yet exist, the profile data variables
/// referring to them will also be created.
GlobalVariable *getOrCreateRegionCounters(InstrProfInstBase *Inc);
+ /// Create the region counters.
+ GlobalVariable *createRegionCounters(InstrProfInstBase *Inc, StringRef Name,
+ GlobalValue::LinkageTypes Linkage);
+
/// Emit the section with compressed function names.
void emitNameData();
diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
index f4d1b1d90e6f..b9ad56ba7509 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
@@ -15,6 +15,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
namespace llvm {
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
index 7ba9d65cae55..e83cc2b9bef0 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
@@ -73,7 +73,7 @@ class PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
LPMUpdater &>> {
public:
- explicit PassManager() {}
+ explicit PassManager() = default;
// FIXME: These are equivalent to the default move constructor/move
// assignment. However, using = default triggers linker errors due to the
diff --git a/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h b/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h
index a5ad4a2192a0..61c7bf0454e1 100644
--- a/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h
+++ b/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h
@@ -23,7 +23,7 @@ namespace llvm {
struct LowerConstantIntrinsicsPass :
PassInfoMixin<LowerConstantIntrinsicsPass> {
public:
- explicit LowerConstantIntrinsicsPass() {}
+ explicit LowerConstantIntrinsicsPass() = default;
/// Run the pass over the function.
///
diff --git a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
index 81363130e2e3..f4472e699295 100644
--- a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
@@ -18,6 +18,7 @@
#define LLVM_TRANSFORMS_SCALAR_SCALARIZER_H
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
namespace llvm {
diff --git a/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h b/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
index 04a5f7e6ff38..64691d68b1c4 100644
--- a/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
+++ b/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
@@ -14,6 +14,7 @@
#define LLVM_TRANSFORMS_SCALAR_WARNMISSEDTRANSFORMS_H
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
namespace llvm {
class Function;
@@ -22,7 +23,7 @@ class Function;
class WarnMissedTransformationsPass
: public PassInfoMixin<WarnMissedTransformationsPass> {
public:
- explicit WarnMissedTransformationsPass() {}
+ explicit WarnMissedTransformationsPass() = default;
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
diff --git a/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h b/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h
index a497722eece6..d679bca69510 100644
--- a/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h
+++ b/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h
@@ -20,6 +20,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
namespace llvm {
class AssumptionCache;
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 8970afb3aeaa..d99b2a56559d 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -46,9 +46,9 @@ class Value;
/// instruction. If \p Updates is specified, collect all necessary DT updates
/// into this vector. If \p KeepOneInputPHIs is true, one-input Phis in
/// successors of blocks being deleted will be preserved.
-void DetatchDeadBlocks(ArrayRef <BasicBlock *> BBs,
- SmallVectorImpl<DominatorTree::UpdateType> *Updates,
- bool KeepOneInputPHIs = false);
+void detachDeadBlocks(ArrayRef <BasicBlock *> BBs,
+ SmallVectorImpl<DominatorTree::UpdateType> *Updates,
+ bool KeepOneInputPHIs = false);
/// Delete the specified block, which must have no predecessors.
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU = nullptr,
diff --git a/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h b/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h
index f8211d60938e..e12d7e09aad6 100644
--- a/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h
+++ b/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h
@@ -53,7 +53,7 @@ class CallGraphUpdater {
///}
public:
- CallGraphUpdater() {}
+ CallGraphUpdater() = default;
~CallGraphUpdater() { finalize(); }
/// Initializers for usage outside of a CGSCC pass, inside a CGSCC pass in
diff --git a/llvm/include/llvm/Transforms/Utils/Debugify.h b/llvm/include/llvm/Transforms/Utils/Debugify.h
index 0f1c7ec724df..892e354cd9ed 100644
--- a/llvm/include/llvm/Transforms/Utils/Debugify.h
+++ b/llvm/include/llvm/Transforms/Utils/Debugify.h
@@ -21,6 +21,7 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
using DebugFnMap = llvm::MapVector<llvm::StringRef, const llvm::DISubprogram *>;
using DebugInstMap = llvm::MapVector<const llvm::Instruction *, bool>;
diff --git a/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h b/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h
index 84e4fee51c26..af9cdb9fd619 100644
--- a/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h
+++ b/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h
@@ -15,6 +15,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
namespace llvm {
class InjectTLIMappings : public PassInfoMixin<InjectTLIMappings> {
diff --git a/llvm/include/llvm/Transforms/Utils/LoopPeel.h b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
index 7b6595c192de..07dabaeaa907 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopPeel.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
@@ -21,7 +21,7 @@ namespace llvm {
bool canPeel(Loop *L);
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE,
- DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA);
+ DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA);
TargetTransformInfo::PeelingPreferences
gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
diff --git a/llvm/include/llvm/Transforms/Utils/ModuleUtils.h b/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
index 9bbe8ea7e1e8..8d459972336b 100644
--- a/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -15,6 +15,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
#include <utility> // for std::pair
namespace llvm {
@@ -106,6 +107,10 @@ void filterDeadComdatFunctions(
/// unique identifier for this module, so we return the empty string.
std::string getUniqueModuleId(Module *M);
+/// Embed the memory buffer \p Buf into the module \p M as a global using the
+/// specified section name.
+void embedBufferInModule(Module &M, MemoryBufferRef Buf, StringRef SectionName);
+
class CallInst;
namespace VFABI {
/// Overwrite the Vector Function ABI variants attribute with the names provide
diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
index 5de575aed059..ad24cb454d5e 100644
--- a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
+++ b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
@@ -42,7 +42,7 @@ class SSAUpdaterBulk {
SmallVector<Use *, 4> Uses;
StringRef Name;
Type *Ty;
- RewriteInfo(){};
+ RewriteInfo() = default;
RewriteInfo(StringRef &N, Type *T) : Name(N), Ty(T){};
};
SmallVector<RewriteInfo, 4> Rewrites;
@@ -52,10 +52,10 @@ class SSAUpdaterBulk {
Value *computeValueAt(BasicBlock *BB, RewriteInfo &R, DominatorTree *DT);
public:
- explicit SSAUpdaterBulk(){};
+ explicit SSAUpdaterBulk() = default;
SSAUpdaterBulk(const SSAUpdaterBulk &) = delete;
SSAUpdaterBulk &operator=(const SSAUpdaterBulk &) = delete;
- ~SSAUpdaterBulk(){};
+ ~SSAUpdaterBulk() = default;
/// Add a new variable to the SSA rewriter. This needs to be called before
/// AddAvailableValue or AddUse calls. The return value is the variable ID,
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
index f72c76c6f0f2..3636285e38f5 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
@@ -10,6 +10,7 @@
#define LLVM_TRANSFORMS_VECTORIZE_LOADSTOREVECTORIZER_H
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
namespace llvm {
diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap
index 25c7aeee148e..d0693ccfd8f6 100644
--- a/llvm/include/llvm/module.modulemap
+++ b/llvm/include/llvm/module.modulemap
@@ -60,6 +60,7 @@ module LLVM_BinaryFormat {
textual header "BinaryFormat/DynamicTags.def"
textual header "BinaryFormat/MachO.def"
textual header "BinaryFormat/MinidumpConstants.def"
+ textual header "BinaryFormat/Swift.def"
textual header "BinaryFormat/ELFRelocs/AArch64.def"
textual header "BinaryFormat/ELFRelocs/AMDGPU.def"
textual header "BinaryFormat/ELFRelocs/ARM.def"
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index b4c985962837..0a0b53796add 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1010,10 +1010,13 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
return ModRefInfo::NoModRef;
}
- // The semantics of memcpy intrinsics either exactly overlap or do not
- // overlap, i.e., source and destination of any given memcpy are either
- // no-alias or must-alias.
- if (auto *Inst = dyn_cast<AnyMemCpyInst>(Call)) {
+ // Ideally, there should be no need to special case for memcpy/memove
+ // intrinsics here since general machinery (based on memory attributes) should
+ // already handle it just fine. Unfortunately, it doesn't due to deficiency in
+ // operand bundles support. At the moment it's not clear if complexity behind
+ // enhancing general mechanism worths it.
+ // TODO: Consider improving operand bundles support in general mechanism.
+ if (auto *Inst = dyn_cast<AnyMemTransferInst>(Call)) {
AliasResult SrcAA =
getBestAAResults().alias(MemoryLocation::getForSource(Inst), Loc, AAQI);
AliasResult DestAA =
diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index d2f0c57f6dab..01681c47418a 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -39,6 +39,10 @@ cl::opt<bool>
MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
cl::desc("only allow matching call instructions if the "
"name and type signature match."));
+
+cl::opt<bool>
+ DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
+ cl::desc("Don't match or outline intrinsics"));
} // namespace llvm
IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
@@ -109,6 +113,24 @@ void IRInstructionData::setCalleeName(bool MatchByName) {
assert(CI && "Instruction must be call");
CalleeName = "";
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ // To hash intrinsics, we use the opcode, and types like the other
+ // instructions, but also, the Intrinsic ID, and the Name of the
+ // intrinsic.
+ Intrinsic::ID IntrinsicID = II->getIntrinsicID();
+ FunctionType *FT = II->getFunctionType();
+ // If there is an overloaded name, we have to use the complex version
+ // of getName to get the entire string.
+ if (Intrinsic::isOverloaded(IntrinsicID))
+ CalleeName =
+ Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
+ // If there is not an overloaded name, we only need to use this version.
+ else
+ CalleeName = Intrinsic::getName(IntrinsicID).str();
+
+ return;
+ }
+
if (!CI->isIndirectCall() && MatchByName)
CalleeName = CI->getCalledFunction()->getName().str();
}
@@ -232,7 +254,7 @@ bool IRSimilarity::isClose(const IRInstructionData &A,
// name is the same. We already know that the types are since is
// isSameOperationAs is true.
if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
- if (A.getCalleeName().str().compare(B.getCalleeName().str()) != 0)
+ if (A.getCalleeName().str() != B.getCalleeName().str())
return false;
}
@@ -1139,6 +1161,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
Mapper.InstClassifier.EnableBranches = this->EnableBranches;
Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
+ Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
populateMapper(Modules, InstrList, IntegerMapping);
findCandidates(InstrList, IntegerMapping);
@@ -1151,6 +1174,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
Mapper.InstClassifier.EnableBranches = this->EnableBranches;
Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
+ Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
std::vector<IRInstructionData *> InstrList;
std::vector<unsigned> IntegerMapping;
@@ -1172,7 +1196,7 @@ IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
- MatchCallsByName));
+ MatchCallsByName, !DisableIntrinsics));
return false;
}
@@ -1189,9 +1213,8 @@ bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
AnalysisKey IRSimilarityAnalysis::Key;
IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
ModuleAnalysisManager &) {
-
auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
- MatchCallsByName);
+ MatchCallsByName, !DisableIntrinsics);
IRSI.findSimilarity(M);
return IRSI;
}
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index b71b39334ace..4775340b3438 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -951,7 +951,7 @@ static Value *simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0,
// X / undef -> poison
// X % undef -> poison
- if (Q.isUndefValue(Op1))
+ if (Q.isUndefValue(Op1) || isa<PoisonValue>(Op1))
return PoisonValue::get(Ty);
// X / 0 -> poison
@@ -2418,6 +2418,10 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
if (Constant *C = foldOrCommuteConstant(Instruction::Xor, Op0, Op1, Q))
return C;
+ // X ^ poison -> poison
+ if (isa<PoisonValue>(Op1))
+ return Op1;
+
// A ^ undef -> undef
if (Q.isUndefValue(Op1))
return Op1;
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 0fbf1db0685d..cd0d4d6b9ca8 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -208,7 +208,7 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Align Alignment,
}
bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
- MaybeAlign MA,
+ Align Alignment,
const DataLayout &DL,
const Instruction *CtxI,
const DominatorTree *DT,
@@ -223,8 +223,6 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
// determine the exact offset to the attributed variable, we can use that
// information here.
- // Require ABI alignment for loads without alignment specification
- const Align Alignment = DL.getValueOrABITypeAlignment(MA, Ty);
APInt AccessSize(DL.getPointerTypeSizeInBits(V->getType()),
DL.getTypeStoreSize(Ty));
return isDereferenceableAndAlignedPointer(V, Alignment, AccessSize, DL, CtxI,
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index dd6958716127..b161c490a6bc 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -1107,6 +1107,10 @@ int llvm::getIntLoopAttribute(const Loop *TheLoop, StringRef Name,
return getOptionalIntLoopAttribute(TheLoop, Name).getValueOr(Default);
}
+bool llvm::isFinite(const Loop *L) {
+ return L->getHeader()->getParent()->willReturn();
+}
+
static const char *LLVMLoopMustProgress = "llvm.loop.mustprogress";
bool llvm::hasMustProgress(const Loop *L) {
diff --git a/llvm/lib/Analysis/MemDerefPrinter.cpp b/llvm/lib/Analysis/MemDerefPrinter.cpp
index 30937a2e4931..82617c7256a5 100644
--- a/llvm/lib/Analysis/MemDerefPrinter.cpp
+++ b/llvm/lib/Analysis/MemDerefPrinter.cpp
@@ -59,8 +59,8 @@ bool MemDerefPrinter::runOnFunction(Function &F) {
Value *PO = LI->getPointerOperand();
if (isDereferenceablePointer(PO, LI->getType(), DL))
Deref.push_back(PO);
- if (isDereferenceableAndAlignedPointer(PO, LI->getType(),
- MaybeAlign(LI->getAlign()), DL))
+ if (isDereferenceableAndAlignedPointer(PO, LI->getType(), LI->getAlign(),
+ DL))
DerefAndAligned.insert(PO);
}
}
@@ -94,8 +94,8 @@ PreservedAnalyses MemDerefPrinterPass::run(Function &F,
Value *PO = LI->getPointerOperand();
if (isDereferenceablePointer(PO, LI->getType(), DL))
Deref.push_back(PO);
- if (isDereferenceableAndAlignedPointer(PO, LI->getType(),
- MaybeAlign(LI->getAlign()), DL))
+ if (isDereferenceableAndAlignedPointer(PO, LI->getType(), LI->getAlign(),
+ DL))
DerefAndAligned.insert(PO);
}
}
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 07aac1523b47..977fc0911355 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3486,7 +3486,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
return S;
}
-const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
+APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
APInt A = C1->getAPInt().abs();
APInt B = C2->getAPInt().abs();
uint32_t ABW = A.getBitWidth();
@@ -7017,7 +7017,7 @@ bool ScalarEvolution::loopIsFiniteByAssumption(const Loop *L) {
// A mustprogress loop without side effects must be finite.
// TODO: The check used here is very conservative. It's only *specific*
// side effects which are well defined in infinite loops.
- return isMustProgress(L) && loopHasNoSideEffects(L);
+ return isFinite(L) || (isMustProgress(L) && loopHasNoSideEffects(L));
}
const SCEV *ScalarEvolution::createSCEV(Value *V) {
@@ -8466,8 +8466,11 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
Pred = ICmpInst::getSwappedPredicate(Pred);
}
+ bool ControllingFiniteLoop =
+ ControlsExit && loopHasNoAbnormalExits(L) && loopIsFiniteByAssumption(L);
// Simplify the operands before analyzing them.
- (void)SimplifyICmpOperands(Pred, LHS, RHS);
+ (void)SimplifyICmpOperands(Pred, LHS, RHS, /*Depth=*/0,
+ ControllingFiniteLoop);
// If we have a comparison of a chrec against a constant, try to use value
// ranges to answer this query.
@@ -8487,9 +8490,7 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
// the same values on self-wrap of the IV, then we can infer that IV
// doesn't self wrap because if it did, we'd have an infinite (undefined)
// loop.
- if (ControlsExit && isLoopInvariant(RHS, L) && loopHasNoAbnormalExits(L) &&
- loopIsFiniteByAssumption(L)) {
-
+ if (ControllingFiniteLoop && isLoopInvariant(RHS, L)) {
// TODO: We can peel off any functions which are invertible *in L*. Loop
// invariant terms are effectively constants for our purposes here.
auto *InnerLHS = LHS;
@@ -9940,7 +9941,8 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) {
bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
const SCEV *&LHS, const SCEV *&RHS,
- unsigned Depth) {
+ unsigned Depth,
+ bool ControllingFiniteLoop) {
bool Changed = false;
// Simplifies ICMP to trivial true or false by turning it into '0 == 0' or
// '0 != 0'.
@@ -10069,10 +10071,15 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
}
// If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
- // adding or subtracting 1 from one of the operands.
+ // adding or subtracting 1 from one of the operands. This can be done for
+ // one of two reasons:
+ // 1) The range of the RHS does not include the (signed/unsigned) boundaries
+ // 2) The loop is finite, with this comparison controlling the exit. Since the
+ // loop is finite, the bound cannot include the corresponding boundary
+ // (otherwise it would loop forever).
switch (Pred) {
case ICmpInst::ICMP_SLE:
- if (!getSignedRangeMax(RHS).isMaxSignedValue()) {
+ if (ControllingFiniteLoop || !getSignedRangeMax(RHS).isMaxSignedValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
SCEV::FlagNSW);
Pred = ICmpInst::ICMP_SLT;
@@ -10085,7 +10092,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
}
break;
case ICmpInst::ICMP_SGE:
- if (!getSignedRangeMin(RHS).isMinSignedValue()) {
+ if (ControllingFiniteLoop || !getSignedRangeMin(RHS).isMinSignedValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
SCEV::FlagNSW);
Pred = ICmpInst::ICMP_SGT;
@@ -10098,7 +10105,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
}
break;
case ICmpInst::ICMP_ULE:
- if (!getUnsignedRangeMax(RHS).isMaxValue()) {
+ if (ControllingFiniteLoop || !getUnsignedRangeMax(RHS).isMaxValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
SCEV::FlagNUW);
Pred = ICmpInst::ICMP_ULT;
@@ -10110,7 +10117,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
}
break;
case ICmpInst::ICMP_UGE:
- if (!getUnsignedRangeMin(RHS).isMinValue()) {
+ if (ControllingFiniteLoop || !getUnsignedRangeMin(RHS).isMinValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS);
Pred = ICmpInst::ICMP_UGT;
Changed = true;
@@ -10130,7 +10137,8 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
// Recursively simplify until we either hit a recursion limit or nothing
// changes.
if (Changed)
- return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1);
+ return SimplifyICmpOperands(Pred, LHS, RHS, Depth + 1,
+ ControllingFiniteLoop);
return Changed;
}
@@ -10911,7 +10919,8 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
// For unsigned and equality predicates, try to prove that both found
// operands fit into narrow unsigned range. If so, try to prove facts in
// narrow types.
- if (!CmpInst::isSigned(FoundPred) && !FoundLHS->getType()->isPointerTy()) {
+ if (!CmpInst::isSigned(FoundPred) && !FoundLHS->getType()->isPointerTy() &&
+ !FoundRHS->getType()->isPointerTy()) {
auto *NarrowType = LHS->getType();
auto *WideType = FoundLHS->getType();
auto BitWidth = getTypeSizeInBits(NarrowType);
@@ -10929,7 +10938,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
}
}
- if (LHS->getType()->isPointerTy())
+ if (LHS->getType()->isPointerTy() || RHS->getType()->isPointerTy())
return false;
if (CmpInst::isSigned(Pred)) {
LHS = getSignExtendExpr(LHS, FoundLHS->getType());
@@ -10940,7 +10949,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
}
} else if (getTypeSizeInBits(LHS->getType()) >
getTypeSizeInBits(FoundLHS->getType())) {
- if (FoundLHS->getType()->isPointerTy())
+ if (FoundLHS->getType()->isPointerTy() || FoundRHS->getType()->isPointerTy())
return false;
if (CmpInst::isSigned(FoundPred)) {
FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 23dbb32f38de..627a78a2a2fd 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -786,3 +786,36 @@ MDNode *AAMDNodes::shiftTBAAStruct(MDNode *MD, size_t Offset) {
}
return MDNode::get(MD->getContext(), Sub);
}
+
+MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) {
+ // Fast path if 0-length
+ if (Len == 0)
+ return nullptr;
+
+ // Regular TBAA is invariant of length, so we only need to consider
+ // struct-path TBAA.
+ if (!isStructPathTBAA(MD))
+ return MD;
+
+ TBAAStructTagNode Tag(MD);
+
+ // Only new format TBAA has a size
+ if (!Tag.isNewFormat())
+ return MD;
+
+ // If unknown size, drop the TBAA.
+ if (Len == -1)
+ return nullptr;
+
+ // Otherwise, create TBAA with the new Len
+ SmallVector<Metadata *, 4> NextNodes(MD->operands());
+ ConstantInt *PreviousSize = mdconst::extract<ConstantInt>(NextNodes[3]);
+
+ // Don't create a new MDNode if it is the same length.
+ if (PreviousSize->equalsInt(Len))
+ return MD;
+
+ NextNodes[3] =
+ ConstantAsMetadata::get(ConstantInt::get(PreviousSize->getType(), Len));
+ return MDNode::get(MD->getContext(), NextNodes);
+}
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 34358739f9a8..c14bdb8bc262 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -4559,8 +4559,8 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
return false;
const DataLayout &DL = LI->getModule()->getDataLayout();
return isDereferenceableAndAlignedPointer(
- LI->getPointerOperand(), LI->getType(), MaybeAlign(LI->getAlign()), DL,
- CtxI, DT, TLI);
+ LI->getPointerOperand(), LI->getType(), LI->getAlign(), DL, CtxI, DT,
+ TLI);
}
case Instruction::Call: {
auto *CI = cast<const CallInst>(Inst);
diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
index 99d2c8221281..0d28d93c93c0 100644
--- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
+++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
@@ -117,15 +117,28 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::DocNode &Node) {
.Case("image", true)
.Case("pipe", true)
.Case("queue", true)
+ .Case("hidden_block_count_x", true)
+ .Case("hidden_block_count_y", true)
+ .Case("hidden_block_count_z", true)
+ .Case("hidden_group_size_x", true)
+ .Case("hidden_group_size_y", true)
+ .Case("hidden_group_size_z", true)
+ .Case("hidden_remainder_x", true)
+ .Case("hidden_remainder_y", true)
+ .Case("hidden_remainder_z", true)
.Case("hidden_global_offset_x", true)
.Case("hidden_global_offset_y", true)
.Case("hidden_global_offset_z", true)
+ .Case("hidden_grid_dims", true)
.Case("hidden_none", true)
.Case("hidden_printf_buffer", true)
.Case("hidden_hostcall_buffer", true)
.Case("hidden_default_queue", true)
.Case("hidden_completion_action", true)
.Case("hidden_multigrid_sync_arg", true)
+ .Case("hidden_private_base", true)
+ .Case("hidden_shared_base", true)
+ .Case("hidden_queue_ptr", true)
.Default(false);
}))
return false;
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index eb4e09ea3a26..4bba0b356675 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -4669,7 +4669,7 @@ void IndexBitcodeWriter::write() {
// where it will be written in a new bitcode block. This is used when
// writing the combined index file for ThinLTO. When writing a subset of the
// index for a distributed backend, provide a \p ModuleToSummariesForIndex map.
-void llvm::WriteIndexToFile(
+void llvm::writeIndexToFile(
const ModuleSummaryIndex &Index, raw_ostream &Out,
const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex) {
SmallVector<char, 0> Buffer;
@@ -4829,7 +4829,7 @@ void BitcodeWriter::writeThinLinkBitcode(const Module &M,
// Write the specified thin link bitcode file to the given raw output stream,
// where it will be written in a new bitcode block. This is used when
// writing the per-module index file for ThinLTO.
-void llvm::WriteThinLinkBitcodeToFile(const Module &M, raw_ostream &Out,
+void llvm::writeThinLinkBitcodeToFile(const Module &M, raw_ostream &Out,
const ModuleSummaryIndex &Index,
const ModuleHash &ModHash) {
SmallVector<char, 0> Buffer;
@@ -4881,7 +4881,7 @@ static const char *getSectionNameForCommandline(const Triple &T) {
llvm_unreachable("Unimplemented ObjectFormatType");
}
-void llvm::EmbedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf,
+void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf,
bool EmbedBitcode, bool EmbedCmdline,
const std::vector<uint8_t> &CmdArgs) {
// Save llvm.compiler.used and remove it.
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index e8fef505e43d..cdf5586766da 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -585,7 +585,7 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
// goes, they shouldn't affect whether the call is a tail call.
for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
Attribute::DereferenceableOrNull, Attribute::NoAlias,
- Attribute::NonNull}) {
+ Attribute::NonNull, Attribute::NoUndef}) {
CallerAttrs.removeAttribute(Attr);
CalleeAttrs.removeAttribute(Attr);
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 4f3f798fe6f8..3e8e190eecc3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1647,8 +1647,18 @@ void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) {
// Set the symbol type to function if the alias has a function type.
// This affects codegen when the aliasee is not a function.
- if (IsFunction)
+ if (IsFunction) {
OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
+ if (TM.getTargetTriple().isOSBinFormatCOFF()) {
+ OutStreamer->BeginCOFFSymbolDef(Name);
+ OutStreamer->EmitCOFFSymbolStorageClass(
+ GA.hasLocalLinkage() ? COFF::IMAGE_SYM_CLASS_STATIC
+ : COFF::IMAGE_SYM_CLASS_EXTERNAL);
+ OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ << COFF::SCT_COMPLEX_TYPE_SHIFT);
+ OutStreamer->EndCOFFSymbolDef();
+ }
+ }
emitVisibility(Name, GA.getVisibility());
diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 1a0256f30d41..396322c4979d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -314,8 +314,7 @@ unsigned DIE::computeOffsetsAndAbbrevs(const dwarf::FormParams &FormParams,
//===----------------------------------------------------------------------===//
// DIEUnit Implementation
//===----------------------------------------------------------------------===//
-DIEUnit::DIEUnit(dwarf::Tag UnitTag)
- : Die(UnitTag), Section(nullptr), Offset(0) {
+DIEUnit::DIEUnit(dwarf::Tag UnitTag) : Die(UnitTag) {
Die.Owner = this;
assert((UnitTag == dwarf::DW_TAG_compile_unit ||
UnitTag == dwarf::DW_TAG_skeleton_unit ||
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index e36b7e2ae885..63343d2519f9 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -33,8 +33,7 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A)
- : EHStreamer(A), shouldEmitCFI(false), hasEmittedCFISections(false) {}
+DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A) : EHStreamer(A) {}
void DwarfCFIExceptionBase::markFunctionEnd() {
endFragment();
@@ -52,8 +51,7 @@ void DwarfCFIExceptionBase::endFragment() {
}
DwarfCFIException::DwarfCFIException(AsmPrinter *A)
- : DwarfCFIExceptionBase(A), shouldEmitPersonality(false),
- forceEmitPersonality(false), shouldEmitLSDA(false) {}
+ : DwarfCFIExceptionBase(A) {}
DwarfCFIException::~DwarfCFIException() {}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 680b9586228f..609b568f28be 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -3367,8 +3367,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
// Fast path if we're building some type units and one has already used the
// address pool we know we're going to throw away all this work anyway, so
// don't bother building dependent types.
- if (!TypeUnitsUnderConstruction.empty() &&
- (AddrPool.hasBeenUsed() || SeenLocalType))
+ if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed())
return;
auto Ins = TypeSignatures.insert(std::make_pair(CTy, 0));
@@ -3379,7 +3378,6 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
bool TopLevelType = TypeUnitsUnderConstruction.empty();
AddrPool.resetUsedFlag();
- SeenLocalType = false;
auto OwnedUnit = std::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
getDwoLineTable(CU));
@@ -3423,7 +3421,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
// Types referencing entries in the address table cannot be placed in type
// units.
- if (AddrPool.hasBeenUsed() || SeenLocalType) {
+ if (AddrPool.hasBeenUsed()) {
// Remove all the types built while building this type.
// This is pessimistic as some of these types might not be dependent on
@@ -3451,18 +3449,14 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD)
: DD(DD),
- TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)),
- AddrPoolUsed(DD->AddrPool.hasBeenUsed()),
- SeenLocalType(DD->SeenLocalType) {
+ TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), AddrPoolUsed(DD->AddrPool.hasBeenUsed()) {
DD->TypeUnitsUnderConstruction.clear();
DD->AddrPool.resetUsedFlag();
- DD->SeenLocalType = false;
}
DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() {
DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction);
DD->AddrPool.resetUsedFlag(AddrPoolUsed);
- DD->SeenLocalType = SeenLocalType;
}
DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 0043000652e8..4e1a1b1e068d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -433,7 +433,6 @@ private:
DenseMap<const DIStringType *, unsigned> StringTypeLocMap;
AddressPool AddrPool;
- bool SeenLocalType = false;
/// Accelerator tables.
AccelTable<DWARF5AccelTableData> AccelDebugNames;
@@ -672,7 +671,6 @@ public:
DwarfDebug *DD;
decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction;
bool AddrPoolUsed;
- bool SeenLocalType;
friend class DwarfDebug;
NonTypeUnitContext(DwarfDebug *DD);
public:
@@ -681,7 +679,6 @@ public:
};
NonTypeUnitContext enterNonTypeUnitContext();
- void seenLocalType() { SeenLocalType = true; }
/// Add a label so that arange data can be generated for it.
void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index 4defa8a30855..e5cda4739fde 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -26,9 +26,9 @@ protected:
DwarfCFIExceptionBase(AsmPrinter *A);
/// Per-function flag to indicate if frame CFI info should be emitted.
- bool shouldEmitCFI;
+ bool shouldEmitCFI = false;
/// Per-module flag to indicate if .cfi_section has beeen emitted.
- bool hasEmittedCFISections;
+ bool hasEmittedCFISections = false;
void markFunctionEnd() override;
void endFragment() override;
@@ -36,13 +36,13 @@ protected:
class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase {
/// Per-function flag to indicate if .cfi_personality should be emitted.
- bool shouldEmitPersonality;
+ bool shouldEmitPersonality = false;
/// Per-function flag to indicate if .cfi_personality must be emitted.
- bool forceEmitPersonality;
+ bool forceEmitPersonality = false;
/// Per-function flag to indicate if .cfi_lsda should be emitted.
- bool shouldEmitLSDA;
+ bool shouldEmitLSDA = false;
public:
//===--------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index ee932d105107..fe438102ee98 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -287,9 +287,17 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
// expression representing a value, rather than a location.
if ((!isParameterValue() && !isMemoryLocation() && !HasComplexExpression) ||
isEntryValue()) {
+ auto FragmentInfo = ExprCursor.getFragmentInfo();
+ unsigned RegSize = 0;
for (auto &Reg : DwarfRegs) {
+ RegSize += Reg.SubRegSize;
if (Reg.DwarfRegNo >= 0)
addReg(Reg.DwarfRegNo, Reg.Comment);
+ if (FragmentInfo)
+ if (RegSize > FragmentInfo->SizeInBits)
+ // If the register is larger than the current fragment stop
+ // once the fragment is covered.
+ break;
addOpPiece(Reg.SubRegSize);
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 15d90c54adfc..5a2bd479f277 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -89,8 +89,7 @@ bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node,
AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
- : DIEUnit(UnitTag), CUNode(Node), Asm(A), DD(DW), DU(DWU),
- IndexTyDie(nullptr) {}
+ : DIEUnit(UnitTag), CUNode(Node), Asm(A), DD(DW), DU(DWU) {}
DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU,
@@ -597,8 +596,10 @@ DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE,
// Skip updating the accelerator tables since this is not the full type.
if (MDString *TypeId = CTy->getRawIdentifier())
DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
- else
+ else {
+ auto X = DD->enterNonTypeUnitContext();
finishNonUnitTypeDIE(TyDIE, CTy);
+ }
return &TyDIE;
}
constructTypeDIE(TyDIE, CTy);
@@ -1852,23 +1853,5 @@ void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
addString(D, dwarf::DW_AT_name, Name);
if (Name.startswith("_STN") || !Name.contains('<'))
addTemplateParams(D, CTy->getTemplateParams());
- // If the type is in an anonymous namespace, we can't reference it from a TU
- // (since the type would be CU local and the TU doesn't specify which TU has
- // the appropriate type definition) - so flag this emission as such and skip
- // the rest of the emission now since we're going to throw out all this work
- // and put the outer/referencing type in the CU instead.
- // FIXME: Probably good to generalize this to a DICompositeType flag populated
- // by the frontend, then we could use that to have types that can have
- // decl+def merged by LTO but where the definition still doesn't go in a type
- // unit because the type has only one definition.
- for (DIScope *S = CTy->getScope(); S; S = S->getScope()) {
- if (auto *NS = dyn_cast<DINamespace>(S)) {
- if (NS->getName().empty()) {
- DD->seenLocalType();
- break;
- }
- }
- }
- auto X = DD->enterNonTypeUnitContext();
getCU().createTypeDIE(CTy);
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 330f3bacca43..48d63d126701 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -51,7 +51,7 @@ protected:
DwarfFile *DU;
/// An anonymous type for index type. Owned by DIEUnit.
- DIE *IndexTyDie;
+ DIE *IndexTyDie = nullptr;
/// Tracks the mapping of unit level debug information variables to debug
/// information entries.
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 28f24e5ea908..c888adeafca5 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -3446,7 +3446,7 @@ private:
bool AllAddrModesTrivial = true;
/// Common Type for all different fields in addressing modes.
- Type *CommonType;
+ Type *CommonType = nullptr;
/// SimplifyQuery for simplifyInstruction utility.
const SimplifyQuery &SQ;
@@ -3456,7 +3456,7 @@ private:
public:
AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
- : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {}
+ : SQ(_SQ), Original(OriginalValue) {}
/// Get the combined AddrMode
const ExtAddrMode &getAddrMode() const {
diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 0b5469b02637..6a0da4dad3c1 100644
--- a/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -111,12 +111,11 @@ public:
/// Information about each phi in the Tail block.
struct PHIInfo {
MachineInstr *PHI;
- unsigned TReg, FReg;
+ unsigned TReg = 0, FReg = 0;
// Latencies from Cond+Branch, TReg, and FReg to DstReg.
- int CondCycles, TCycles, FCycles;
+ int CondCycles = 0, TCycles = 0, FCycles = 0;
- PHIInfo(MachineInstr *phi)
- : PHI(phi), TReg(0), FReg(0), CondCycles(0), TCycles(0), FCycles(0) {}
+ PHIInfo(MachineInstr *phi) : PHI(phi) {}
};
SmallVector<PHIInfo, 8> PHIs;
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index d0c2b8c267ff..60ee1812ee2c 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -70,8 +70,8 @@ class MemCmpExpansion {
CallInst *const CI;
ResultBlock ResBlock;
const uint64_t Size;
- unsigned MaxLoadSize;
- uint64_t NumLoadsNonOneByte;
+ unsigned MaxLoadSize = 0;
+ uint64_t NumLoadsNonOneByte = 0;
const uint64_t NumLoadsPerBlockForZeroCmp;
std::vector<BasicBlock *> LoadCmpBlocks;
BasicBlock *EndBlock;
@@ -219,8 +219,7 @@ MemCmpExpansion::MemCmpExpansion(
const TargetTransformInfo::MemCmpExpansionOptions &Options,
const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
DomTreeUpdater *DTU)
- : CI(CI), Size(Size), MaxLoadSize(0), NumLoadsNonOneByte(0),
- NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
+ : CI(CI), Size(Size), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU),
Builder(CI) {
assert(Size > 0 && "zero blocks");
diff --git a/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
index 727d33fe4a40..6271a4514c27 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
@@ -64,7 +64,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, LegacyLegalizeAction Action) {
return OS;
}
-LegacyLegalizerInfo::LegacyLegalizerInfo() : TablesInitialized(false) {
+LegacyLegalizerInfo::LegacyLegalizerInfo() {
// Set defaults.
// FIXME: these two (G_ANYEXT and G_TRUNC?) can be legalized to the
// fundamental load/store Jakob proposed. Once loads & stores are supported.
diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp
index 681e2f3dc848..1b20d1da20ad 100644
--- a/llvm/lib/CodeGen/IfConversion.cpp
+++ b/llvm/lib/CodeGen/IfConversion.cpp
@@ -1211,11 +1211,11 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
void IfConverter::AnalyzeBlock(
MachineBasicBlock &MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
struct BBState {
- BBState(MachineBasicBlock &MBB) : MBB(&MBB), SuccsAnalyzed(false) {}
+ BBState(MachineBasicBlock &MBB) : MBB(&MBB) {}
MachineBasicBlock *MBB;
/// This flag is true if MBB's successors have been analyzed.
- bool SuccsAnalyzed;
+ bool SuccsAnalyzed = false;
};
// Push MBB to the stack.
diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 2ee9379cb286..230c6846dde2 100644
--- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -656,10 +656,10 @@ public:
};
/// Basic-block the load instructions are within
- BasicBlock *BB;
+ BasicBlock *BB = nullptr;
/// Pointer value of all participation load instructions
- Value *PV;
+ Value *PV = nullptr;
/// Participating load instructions
std::set<LoadInst *> LIs;
@@ -668,7 +668,7 @@ public:
std::set<Instruction *> Is;
/// Final shuffle-vector instruction
- ShuffleVectorInst *SVI;
+ ShuffleVectorInst *SVI = nullptr;
/// Information of the offset for each vector element
ElementInfo *EI;
@@ -676,8 +676,7 @@ public:
/// Vector Type
FixedVectorType *const VTy;
- VectorInfo(FixedVectorType *VTy)
- : BB(nullptr), PV(nullptr), SVI(nullptr), VTy(VTy) {
+ VectorInfo(FixedVectorType *VTy) : VTy(VTy) {
EI = new ElementInfo[VTy->getNumElements()];
}
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index 8a190e769941..0eb6100230bd 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -274,6 +274,13 @@ public:
// Map of the preferred location for each value.
DenseMap<ValueIDNum, LocIdx> ValueToLoc;
+
+ // Initialized the preferred-location map with illegal locations, to be
+ // filled in later.
+ for (auto &VLoc : VLocs)
+ if (VLoc.second.Kind == DbgValue::Def)
+ ValueToLoc.insert({VLoc.second.ID, LocIdx::MakeIllegalLoc()});
+
ActiveMLocs.reserve(VLocs.size());
ActiveVLocs.reserve(VLocs.size());
@@ -285,21 +292,20 @@ public:
ValueIDNum &VNum = MLocs[Idx.asU64()];
VarLocs.push_back(VNum);
- // Short-circuit unnecessary preferred location update.
- if (VLocs.empty())
+ // Is there a variable that wants a location for this value? If not, skip.
+ auto VIt = ValueToLoc.find(VNum);
+ if (VIt == ValueToLoc.end())
continue;
- auto it = ValueToLoc.find(VNum);
+ LocIdx CurLoc = VIt->second;
// In order of preference, pick:
// * Callee saved registers,
// * Other registers,
// * Spill slots.
- if (it == ValueToLoc.end() || MTracker->isSpill(it->second) ||
- (!isCalleeSaved(it->second) && isCalleeSaved(Idx.asU64()))) {
+ if (CurLoc.isIllegal() || MTracker->isSpill(CurLoc) ||
+ (!isCalleeSaved(CurLoc) && isCalleeSaved(Idx.asU64()))) {
// Insert, or overwrite if insertion failed.
- auto PrefLocRes = ValueToLoc.insert(std::make_pair(VNum, Idx));
- if (!PrefLocRes.second)
- PrefLocRes.first->second = Idx;
+ VIt->second = Idx;
}
}
@@ -314,7 +320,7 @@ public:
// If the value has no location, we can't make a variable location.
const ValueIDNum &Num = Var.second.ID;
auto ValuesPreferredLoc = ValueToLoc.find(Num);
- if (ValuesPreferredLoc == ValueToLoc.end()) {
+ if (ValuesPreferredLoc->second.isIllegal()) {
// If it's a def that occurs in this block, register it as a
// use-before-def to be resolved as we step through the block.
if (Num.getBlock() == (unsigned)MBB.getNumber() && !Num.isPHI())
@@ -1374,18 +1380,20 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
// Look for any clobbers performed by a register mask. Only test locations
// that are actually being tracked.
- for (auto L : MTracker->locations()) {
- // Stack locations can't be clobbered by regmasks.
- if (MTracker->isSpill(L.Idx))
- continue;
+ if (!RegMaskPtrs.empty()) {
+ for (auto L : MTracker->locations()) {
+ // Stack locations can't be clobbered by regmasks.
+ if (MTracker->isSpill(L.Idx))
+ continue;
- Register Reg = MTracker->LocIdxToLocID[L.Idx];
- if (IgnoreSPAlias(Reg))
- continue;
+ Register Reg = MTracker->LocIdxToLocID[L.Idx];
+ if (IgnoreSPAlias(Reg))
+ continue;
- for (auto *MO : RegMaskPtrs)
- if (MO->clobbersPhysReg(Reg))
- TTracker->clobberMloc(L.Idx, MI.getIterator(), false);
+ for (auto *MO : RegMaskPtrs)
+ if (MO->clobbersPhysReg(Reg))
+ TTracker->clobberMloc(L.Idx, MI.getIterator(), false);
+ }
}
// Tell TTracker about any folded stack store.
@@ -2212,40 +2220,6 @@ void InstrRefBasedLDV::buildMLocValueMap(
// redundant PHIs.
}
-// Boilerplate for feeding MachineBasicBlocks into IDF calculator. Provide
-// template specialisations for graph traits and a successor enumerator.
-namespace llvm {
-template <> struct GraphTraits<MachineBasicBlock> {
- using NodeRef = MachineBasicBlock *;
- using ChildIteratorType = MachineBasicBlock::succ_iterator;
-
- static NodeRef getEntryNode(MachineBasicBlock *BB) { return BB; }
- static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
- static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
-};
-
-template <> struct GraphTraits<const MachineBasicBlock> {
- using NodeRef = const MachineBasicBlock *;
- using ChildIteratorType = MachineBasicBlock::const_succ_iterator;
-
- static NodeRef getEntryNode(const MachineBasicBlock *BB) { return BB; }
- static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
- static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
-};
-
-using MachineDomTreeBase = DomTreeBase<MachineBasicBlock>::NodeType;
-using MachineDomTreeChildGetter =
- typename IDFCalculatorDetail::ChildrenGetterTy<MachineDomTreeBase, false>;
-
-namespace IDFCalculatorDetail {
-template <>
-typename MachineDomTreeChildGetter::ChildrenTy
-MachineDomTreeChildGetter::get(const NodeRef &N) {
- return {N->succ_begin(), N->succ_end()};
-}
-} // namespace IDFCalculatorDetail
-} // namespace llvm
-
void InstrRefBasedLDV::BlockPHIPlacement(
const SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
const SmallPtrSetImpl<MachineBasicBlock *> &DefBlocks,
@@ -2253,8 +2227,7 @@ void InstrRefBasedLDV::BlockPHIPlacement(
// Apply IDF calculator to the designated set of location defs, storing
// required PHIs into PHIBlocks. Uses the dominator tree stored in the
// InstrRefBasedLDV object.
- IDFCalculatorDetail::ChildrenGetterTy<MachineDomTreeBase, false> foo;
- IDFCalculatorBase<MachineDomTreeBase, false> IDF(DomTree->getBase(), foo);
+ IDFCalculatorBase<MachineBasicBlock, false> IDF(DomTree->getBase());
IDF.setLiveInBlocks(AllBlocks);
IDF.setDefiningBlocks(DefBlocks);
@@ -2465,8 +2438,71 @@ bool InstrRefBasedLDV::vlocJoin(
}
}
-void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
- const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
+void InstrRefBasedLDV::getBlocksForScope(
+ const DILocation *DILoc,
+ SmallPtrSetImpl<const MachineBasicBlock *> &BlocksToExplore,
+ const SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks) {
+ // Get the set of "normal" in-lexical-scope blocks.
+ LS.getMachineBasicBlocks(DILoc, BlocksToExplore);
+
+ // VarLoc LiveDebugValues tracks variable locations that are defined in
+ // blocks not in scope. This is something we could legitimately ignore, but
+ // lets allow it for now for the sake of coverage.
+ BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end());
+
+ // Storage for artificial blocks we intend to add to BlocksToExplore.
+ DenseSet<const MachineBasicBlock *> ToAdd;
+
+ // To avoid needlessly dropping large volumes of variable locations, propagate
+ // variables through aritifical blocks, i.e. those that don't have any
+ // instructions in scope at all. To accurately replicate VarLoc
+ // LiveDebugValues, this means exploring all artificial successors too.
+ // Perform a depth-first-search to enumerate those blocks.
+ for (auto *MBB : BlocksToExplore) {
+ // Depth-first-search state: each node is a block and which successor
+ // we're currently exploring.
+ SmallVector<std::pair<const MachineBasicBlock *,
+ MachineBasicBlock::const_succ_iterator>,
+ 8>
+ DFS;
+
+ // Find any artificial successors not already tracked.
+ for (auto *succ : MBB->successors()) {
+ if (BlocksToExplore.count(succ))
+ continue;
+ if (!ArtificialBlocks.count(succ))
+ continue;
+ ToAdd.insert(succ);
+ DFS.push_back({succ, succ->succ_begin()});
+ }
+
+ // Search all those blocks, depth first.
+ while (!DFS.empty()) {
+ const MachineBasicBlock *CurBB = DFS.back().first;
+ MachineBasicBlock::const_succ_iterator &CurSucc = DFS.back().second;
+ // Walk back if we've explored this blocks successors to the end.
+ if (CurSucc == CurBB->succ_end()) {
+ DFS.pop_back();
+ continue;
+ }
+
+ // If the current successor is artificial and unexplored, descend into
+ // it.
+ if (!ToAdd.count(*CurSucc) && ArtificialBlocks.count(*CurSucc)) {
+ ToAdd.insert(*CurSucc);
+ DFS.push_back({*CurSucc, (*CurSucc)->succ_begin()});
+ continue;
+ }
+
+ ++CurSucc;
+ }
+ };
+
+ BlocksToExplore.insert(ToAdd.begin(), ToAdd.end());
+}
+
+void InstrRefBasedLDV::buildVLocValueMap(
+ const DILocation *DILoc, const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, LiveInsT &Output,
ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
SmallVectorImpl<VLocTracker> &AllTheVLocs) {
@@ -2490,74 +2526,7 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
return BBToOrder[A] < BBToOrder[B];
};
- LS.getMachineBasicBlocks(DILoc, BlocksToExplore);
-
- // A separate container to distinguish "blocks we're exploring" versus
- // "blocks that are potentially in scope. See comment at start of vlocJoin.
- SmallPtrSet<const MachineBasicBlock *, 8> InScopeBlocks = BlocksToExplore;
-
- // VarLoc LiveDebugValues tracks variable locations that are defined in
- // blocks not in scope. This is something we could legitimately ignore, but
- // lets allow it for now for the sake of coverage.
- BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end());
-
- // We also need to propagate variable values through any artificial blocks
- // that immediately follow blocks in scope.
- DenseSet<const MachineBasicBlock *> ToAdd;
-
- // Helper lambda: For a given block in scope, perform a depth first search
- // of all the artificial successors, adding them to the ToAdd collection.
- auto AccumulateArtificialBlocks =
- [this, &ToAdd, &BlocksToExplore,
- &InScopeBlocks](const MachineBasicBlock *MBB) {
- // Depth-first-search state: each node is a block and which successor
- // we're currently exploring.
- SmallVector<std::pair<const MachineBasicBlock *,
- MachineBasicBlock::const_succ_iterator>,
- 8>
- DFS;
-
- // Find any artificial successors not already tracked.
- for (auto *succ : MBB->successors()) {
- if (BlocksToExplore.count(succ) || InScopeBlocks.count(succ))
- continue;
- if (!ArtificialBlocks.count(succ))
- continue;
- ToAdd.insert(succ);
- DFS.push_back(std::make_pair(succ, succ->succ_begin()));
- }
-
- // Search all those blocks, depth first.
- while (!DFS.empty()) {
- const MachineBasicBlock *CurBB = DFS.back().first;
- MachineBasicBlock::const_succ_iterator &CurSucc = DFS.back().second;
- // Walk back if we've explored this blocks successors to the end.
- if (CurSucc == CurBB->succ_end()) {
- DFS.pop_back();
- continue;
- }
-
- // If the current successor is artificial and unexplored, descend into
- // it.
- if (!ToAdd.count(*CurSucc) && ArtificialBlocks.count(*CurSucc)) {
- ToAdd.insert(*CurSucc);
- DFS.push_back(std::make_pair(*CurSucc, (*CurSucc)->succ_begin()));
- continue;
- }
-
- ++CurSucc;
- }
- };
-
- // Search in-scope blocks and those containing a DBG_VALUE from this scope
- // for artificial successors.
- for (auto *MBB : BlocksToExplore)
- AccumulateArtificialBlocks(MBB);
- for (auto *MBB : InScopeBlocks)
- AccumulateArtificialBlocks(MBB);
-
- BlocksToExplore.insert(ToAdd.begin(), ToAdd.end());
- InScopeBlocks.insert(ToAdd.begin(), ToAdd.end());
+ getBlocksForScope(DILoc, BlocksToExplore, AssignBlocks);
// Single block scope: not interesting! No propagation at all. Note that
// this could probably go above ArtificialBlocks without damage, but
@@ -2628,7 +2597,15 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
SmallVector<MachineBasicBlock *, 32> PHIBlocks;
- // Request the set of PHIs we should insert for this variable.
+ // Request the set of PHIs we should insert for this variable. If there's
+ // only one value definition, things are very simple.
+ if (DefBlocks.size() == 1) {
+ placePHIsForSingleVarDefinition(MutBlocksToExplore, *DefBlocks.begin(),
+ AllTheVLocs, Var, Output);
+ continue;
+ }
+
+ // Otherwise: we need to place PHIs through SSA and propagate values.
BlockPHIPlacement(MutBlocksToExplore, DefBlocks, PHIBlocks);
// Insert PHIs into the per-block live-in tables for this variable.
@@ -2769,6 +2746,39 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
BlocksToExplore.clear();
}
+void InstrRefBasedLDV::placePHIsForSingleVarDefinition(
+ const SmallPtrSetImpl<MachineBasicBlock *> &InScopeBlocks,
+ MachineBasicBlock *AssignMBB, SmallVectorImpl<VLocTracker> &AllTheVLocs,
+ const DebugVariable &Var, LiveInsT &Output) {
+ // If there is a single definition of the variable, then working out it's
+ // value everywhere is very simple: it's every block dominated by the
+ // definition. At the dominance frontier, the usual algorithm would:
+ // * Place PHIs,
+ // * Propagate values into them,
+ // * Find there's no incoming variable value from the other incoming branches
+ // of the dominance frontier,
+ // * Specify there's no variable value in blocks past the frontier.
+ // This is a common case, hence it's worth special-casing it.
+
+ // Pick out the variables value from the block transfer function.
+ VLocTracker &VLocs = AllTheVLocs[AssignMBB->getNumber()];
+ auto ValueIt = VLocs.Vars.find(Var);
+ const DbgValue &Value = ValueIt->second;
+
+ // Assign the variable value to entry to each dominated block that's in scope.
+ // Skip the definition block -- it's assigned the variable value in the middle
+ // of the block somewhere.
+ for (auto *ScopeBlock : InScopeBlocks) {
+ if (!DomTree->properlyDominates(AssignMBB, ScopeBlock))
+ continue;
+
+ Output[ScopeBlock->getNumber()].push_back({Var, Value});
+ }
+
+ // All blocks that aren't dominated have no live-in value, thus no variable
+ // value will be given to them.
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void InstrRefBasedLDV::dump_mloc_transfer(
const MLocTransferMap &mloc_transfer) const {
@@ -2806,39 +2816,7 @@ void InstrRefBasedLDV::emitLocations(
}
}
- // Go through all the transfers recorded in the TransferTracker -- this is
- // both the live-ins to a block, and any movements of values that happen
- // in the middle.
- for (const auto &P : TTracker->Transfers) {
- // We have to insert DBG_VALUEs in a consistent order, otherwise they
- // appear in DWARF in different orders. Use the order that they appear
- // when walking through each block / each instruction, stored in
- // AllVarsNumbering.
- SmallVector<std::pair<unsigned, MachineInstr *>> Insts;
- for (MachineInstr *MI : P.Insts) {
- DebugVariable Var(MI->getDebugVariable(), MI->getDebugExpression(),
- MI->getDebugLoc()->getInlinedAt());
- Insts.emplace_back(AllVarsNumbering.find(Var)->second, MI);
- }
- llvm::sort(Insts,
- [](const auto &A, const auto &B) { return A.first < B.first; });
-
- // Insert either before or after the designated point...
- if (P.MBB) {
- MachineBasicBlock &MBB = *P.MBB;
- for (const auto &Pair : Insts)
- MBB.insert(P.Pos, Pair.second);
- } else {
- // Terminators, like tail calls, can clobber things. Don't try and place
- // transfers after them.
- if (P.Pos->isTerminator())
- continue;
-
- MachineBasicBlock &MBB = *P.Pos->getParent();
- for (const auto &Pair : Insts)
- MBB.insertAfterBundle(P.Pos, Pair.second);
- }
- }
+ emitTransfers(AllVarsNumbering);
}
void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
@@ -2883,6 +2861,45 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
#endif
}
+bool InstrRefBasedLDV::emitTransfers(
+ DenseMap<DebugVariable, unsigned> &AllVarsNumbering) {
+ // Go through all the transfers recorded in the TransferTracker -- this is
+ // both the live-ins to a block, and any movements of values that happen
+ // in the middle.
+ for (const auto &P : TTracker->Transfers) {
+ // We have to insert DBG_VALUEs in a consistent order, otherwise they
+ // appear in DWARF in different orders. Use the order that they appear
+ // when walking through each block / each instruction, stored in
+ // AllVarsNumbering.
+ SmallVector<std::pair<unsigned, MachineInstr *>> Insts;
+ for (MachineInstr *MI : P.Insts) {
+ DebugVariable Var(MI->getDebugVariable(), MI->getDebugExpression(),
+ MI->getDebugLoc()->getInlinedAt());
+ Insts.emplace_back(AllVarsNumbering.find(Var)->second, MI);
+ }
+ llvm::sort(Insts,
+ [](const auto &A, const auto &B) { return A.first < B.first; });
+
+ // Insert either before or after the designated point...
+ if (P.MBB) {
+ MachineBasicBlock &MBB = *P.MBB;
+ for (const auto &Pair : Insts)
+ MBB.insert(P.Pos, Pair.second);
+ } else {
+ // Terminators, like tail calls, can clobber things. Don't try and place
+ // transfers after them.
+ if (P.Pos->isTerminator())
+ continue;
+
+ MachineBasicBlock &MBB = *P.Pos->getParent();
+ for (const auto &Pair : Insts)
+ MBB.insertAfterBundle(P.Pos, Pair.second);
+ }
+ }
+
+ return TTracker->Transfers.size() != 0;
+}
+
/// Calculate the liveness information for the given machine function and
/// extend ranges across basic blocks.
bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
@@ -2989,14 +3006,14 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
DenseMap<DebugVariable, unsigned> AllVarsNumbering;
// Map from one LexicalScope to all the variables in that scope.
- DenseMap<const LexicalScope *, SmallSet<DebugVariable, 4>> ScopeToVars;
+ ScopeToVarsT ScopeToVars;
- // Map from One lexical scope to all blocks in that scope.
- DenseMap<const LexicalScope *, SmallPtrSet<MachineBasicBlock *, 4>>
- ScopeToBlocks;
+ // Map from One lexical scope to all blocks where assignments happen for
+ // that scope.
+ ScopeToAssignBlocksT ScopeToAssignBlocks;
- // Store a DILocation that describes a scope.
- DenseMap<const LexicalScope *, const DILocation *> ScopeToDILocation;
+ // Store map of DILocations that describes scopes.
+ ScopeToDILocT ScopeToDILocation;
// To mirror old LiveDebugValues, enumerate variables in RPOT order. Otherwise
// the order is unimportant, it just has to be stable.
@@ -3016,7 +3033,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
AllVarsNumbering.insert(std::make_pair(Var, AllVarsNumbering.size()));
ScopeToVars[Scope].insert(Var);
- ScopeToBlocks[Scope].insert(VTracker->MBB);
+ ScopeToAssignBlocks[Scope].insert(VTracker->MBB);
ScopeToDILocation[Scope] = ScopeLoc;
++VarAssignCount;
}
@@ -3040,7 +3057,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
// a map of variables to values in SavedLiveIns.
for (auto &P : ScopeToVars) {
buildVLocValueMap(ScopeToDILocation[P.first], P.second,
- ScopeToBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs,
+ ScopeToAssignBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs,
vlocs);
}
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
index 9e9c0ce394fd..e7383209c027 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -779,6 +779,17 @@ public:
/// Used as the result type for the variable value dataflow problem.
using LiveInsT = SmallVector<SmallVector<VarAndLoc, 8>, 8>;
+ /// Mapping from lexical scopes to a DILocation in that scope.
+ using ScopeToDILocT = DenseMap<const LexicalScope *, const DILocation *>;
+
+ /// Mapping from lexical scopes to variables in that scope.
+ using ScopeToVarsT = DenseMap<const LexicalScope *, SmallSet<DebugVariable, 4>>;
+
+ /// Mapping from lexical scopes to blocks where variables in that scope are
+ /// assigned. Such blocks aren't necessarily "in" the lexical scope, it's
+ /// just a block where an assignment happens.
+ using ScopeToAssignBlocksT = DenseMap<const LexicalScope *, SmallPtrSet<MachineBasicBlock *, 4>>;
+
private:
MachineDominatorTree *DomTree;
const TargetRegisterInfo *TRI;
@@ -816,7 +827,7 @@ private:
/// Blocks which are artificial, i.e. blocks which exclusively contain
/// instructions without DebugLocs, or with line 0 locations.
- SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks;
+ SmallPtrSet<MachineBasicBlock *, 16> ArtificialBlocks;
// Mapping of blocks to and from their RPOT order.
DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
@@ -958,6 +969,15 @@ private:
ValueIDNum **MInLocs,
SmallVectorImpl<MLocTransferMap> &MLocTransfer);
+ /// Propagate variable values to blocks in the common case where there's
+ /// only one value assigned to the variable. This function has better
+ /// performance as it doesn't have to find the dominance frontier between
+ /// different assignments.
+ void placePHIsForSingleVarDefinition(
+ const SmallPtrSetImpl<MachineBasicBlock *> &InScopeBlocks,
+ MachineBasicBlock *MBB, SmallVectorImpl<VLocTracker> &AllTheVLocs,
+ const DebugVariable &Var, LiveInsT &Output);
+
/// Calculate the iterated-dominance-frontier for a set of defs, using the
/// existing LLVM facilities for this. Works for a single "value" or
/// machine/variable location.
@@ -979,6 +999,19 @@ private:
SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
ValueIDNum **OutLocs, ValueIDNum *InLocs);
+ /// Produce a set of blocks that are in the current lexical scope. This means
+ /// those blocks that contain instructions "in" the scope, blocks where
+ /// assignments to variables in scope occur, and artificial blocks that are
+ /// successors to any of the earlier blocks. See https://llvm.org/PR48091 for
+ /// more commentry on what "in scope" means.
+ /// \p DILoc A location in the scope that we're fetching blocks for.
+ /// \p Output Set to put in-scope-blocks into.
+ /// \p AssignBlocks Blocks known to contain assignments of variables in scope.
+ void
+ getBlocksForScope(const DILocation *DILoc,
+ SmallPtrSetImpl<const MachineBasicBlock *> &Output,
+ const SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks);
+
/// Solve the variable value dataflow problem, for a single lexical scope.
/// Uses the algorithm from the file comment to resolve control flow joins
/// using PHI placement and value propagation. Reads the locations of machine
@@ -1029,6 +1062,12 @@ private:
DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
const TargetPassConfig &TPC);
+ /// Take collections of DBG_VALUE instructions stored in TTracker, and
+ /// install them into their output blocks. Preserves a stable order of
+ /// DBG_VALUEs produced (which would otherwise cause nondeterminism) through
+ /// the AllVarsNumbering order.
+ bool emitTransfers(DenseMap<DebugVariable, unsigned> &AllVarsNumbering);
+
/// Boilerplate computation of some initial sets, artifical blocks and
/// RPOT block ordering.
void initialSetup(MachineFunction &MF);
diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index b4dd41bbb810..42a0967bce3f 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -329,7 +329,7 @@ private:
EntryValueKind,
EntryValueBackupKind,
EntryValueCopyBackupKind
- } EVKind;
+ } EVKind = EntryValueLocKind::NonEntryValueKind;
/// The value location. Stored separately to avoid repeatedly
/// extracting it from MI.
@@ -397,8 +397,7 @@ private:
VarLoc(const MachineInstr &MI, LexicalScopes &LS)
: Var(MI.getDebugVariable(), MI.getDebugExpression(),
MI.getDebugLoc()->getInlinedAt()),
- Expr(MI.getDebugExpression()), MI(MI),
- EVKind(EntryValueLocKind::NonEntryValueKind) {
+ Expr(MI.getDebugExpression()), MI(MI) {
assert(MI.isDebugValue() && "not a DBG_VALUE");
assert((MI.isDebugValueList() || MI.getNumOperands() == 4) &&
"malformed DBG_VALUE");
diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
index a74c57690640..33782c755eb0 100644
--- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
+++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
@@ -220,6 +220,19 @@ void resetInputs(MLModelRunner &Runner) {
#undef _RESET
}
+// Per-live interval components that get aggregated into the feature values that
+// will be passed to the evaluator.
+struct LIFeatureComponents {
+ double R = 0;
+ double W = 0;
+ double RW = 0;
+ double IndVarUpdates = 0;
+ double HintWeights = 0.0;
+ int64_t NrDefsAndUses = 0;
+ float HottestBlockFreq = 0.0;
+ bool IsRemat = false;
+};
+
using CandidateRegList =
std::array<std::pair<MCRegister, bool>, NumberOfInterferences>;
using FeaturesListNormalizer = std::array<float, FeatureIDs::FeatureCount>;
@@ -227,8 +240,8 @@ using FeaturesListNormalizer = std::array<float, FeatureIDs::FeatureCount>;
/// The ML evictor (commonalities between release and development mode)
class MLEvictAdvisor : public RegAllocEvictionAdvisor {
public:
- MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
- MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI,
+ MLEvictAdvisor(MachineFunction &MF, const RAGreedy &RA, MLModelRunner *Runner,
+ const MachineBlockFrequencyInfo &MBFI,
const MachineLoopInfo &Loops);
protected:
@@ -277,6 +290,9 @@ private:
FixedRegisters);
}
+ const LIFeatureComponents
+ getLIFeatureComponents(const LiveInterval &LI) const;
+
// Hold on to a default advisor for:
// 1) the implementation of canEvictHintInterference, because we didn't learn
// that nuance yet;
@@ -319,7 +335,7 @@ private:
}
std::unique_ptr<RegAllocEvictionAdvisor>
- getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+ getAdvisor(MachineFunction &MF, const RAGreedy &RA) override {
if (!Runner)
Runner = std::make_unique<ReleaseModeModelRunner<RegallocEvictModel>>(
MF.getFunction().getContext(), FeatureNames, DecisionName);
@@ -364,7 +380,7 @@ static const std::vector<TensorSpec> TrainingInputFeatures{
class DevelopmentModeEvictAdvisor : public MLEvictAdvisor {
public:
- DevelopmentModeEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ DevelopmentModeEvictAdvisor(MachineFunction &MF, const RAGreedy &RA,
MLModelRunner *Runner,
const MachineBlockFrequencyInfo &MBFI,
const MachineLoopInfo &Loops, Logger *Log)
@@ -420,7 +436,7 @@ private:
}
std::unique_ptr<RegAllocEvictionAdvisor>
- getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+ getAdvisor(MachineFunction &MF, const RAGreedy &RA) override {
LLVMContext &Ctx = MF.getFunction().getContext();
if (ModelUnderTraining.empty() && TrainingLog.empty()) {
Ctx.emitError("Regalloc development mode should be requested with at "
@@ -480,7 +496,7 @@ float MLEvictAdvisor::getInitialQueueSize(const MachineFunction &MF) {
return Ret;
}
-MLEvictAdvisor::MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+MLEvictAdvisor::MLEvictAdvisor(MachineFunction &MF, const RAGreedy &RA,
MLModelRunner *Runner,
const MachineBlockFrequencyInfo &MBFI,
const MachineLoopInfo &Loops)
@@ -615,16 +631,15 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
++I, ++Pos) {
MCRegister PhysReg = *I;
- Regs[Pos] = std::make_pair(PhysReg, true);
+ assert(!Regs[Pos].second);
assert(PhysReg);
if (!canAllocatePhysReg(CostPerUseLimit, PhysReg)) {
- Regs[Pos].second = false;
continue;
}
if (loadInterferenceFeatures(VirtReg, PhysReg, I.isHint(), FixedRegisters,
Largest, Pos)) {
++Available;
- Regs[Pos].second = true;
+ Regs[Pos] = std::make_pair(PhysReg, true);
}
}
if (Available == 0) {
@@ -632,6 +647,7 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
assert(!MustFindEviction);
return MCRegister::NoRegister;
}
+ const size_t ValidPosLimit = Pos;
// If we must find eviction, the candidate should be masked out of the
// decision making process.
Regs[CandidateVirtRegPos].second = !MustFindEviction;
@@ -665,9 +681,55 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
assert(!MustFindEviction);
return MCRegister::NoRegister;
}
+ assert(CandidatePos < ValidPosLimit);
+ (void)ValidPosLimit;
return Regs[CandidatePos].first;
}
+const LIFeatureComponents
+MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const {
+ LIFeatureComponents Ret;
+ SmallPtrSet<MachineInstr *, 8> Visited;
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+
+ for (MachineRegisterInfo::reg_instr_nodbg_iterator
+ I = MRI->reg_instr_nodbg_begin(LI.reg()),
+ E = MRI->reg_instr_nodbg_end();
+ I != E;) {
+ MachineInstr *MI = &*(I++);
+
+ ++Ret.NrDefsAndUses;
+ if (!Visited.insert(MI).second)
+ continue;
+
+ if (MI->isIdentityCopy() || MI->isImplicitDef())
+ continue;
+
+ bool Reads, Writes;
+ std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg());
+
+ float Freq = MBFI.getBlockFreqRelativeToEntryBlock(MI->getParent());
+ Ret.HottestBlockFreq = std::max(Freq, Ret.HottestBlockFreq);
+
+ Ret.R += (Reads && !Writes) * Freq;
+ Ret.W += (!Reads && Writes) * Freq;
+ Ret.RW += (Reads && Writes) * Freq;
+
+ auto *MBB = MI->getParent();
+ auto *Loop = Loops.getLoopFor(MBB);
+ bool IsExiting = Loop ? Loop->isLoopExiting(MBB) : false;
+
+ if (Writes && IsExiting && LIS->isLiveOutOfMBB(LI, MBB))
+ Ret.IndVarUpdates += Freq;
+
+ if (MI->isCopy() && VirtRegAuxInfo::copyHint(MI, LI.reg(), TRI, *MRI))
+ Ret.HintWeights += Freq;
+ }
+ Ret.IsRemat = VirtRegAuxInfo::isRematerializable(
+ LI, *LIS, *VRM, *MF.getSubtarget().getInstrInfo());
+ return Ret;
+}
+
// Overall, this currently mimics what we do for weight calculation, but instead
// of accummulating the various features, we keep them separate.
void MLEvictAdvisor::extractFeatures(
@@ -676,11 +738,11 @@ void MLEvictAdvisor::extractFeatures(
int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const {
int64_t NrDefsAndUses = 0;
int64_t NrBrokenHints = 0;
- float R = 0;
- float W = 0;
- float RW = 0;
- float IndVarUpdates = 0;
- float HintWeights = 0.0;
+ double R = 0.0;
+ double W = 0.0;
+ double RW = 0.0;
+ double IndVarUpdates = 0.0;
+ double HintWeights = 0.0;
float StartBBFreq = 0.0;
float EndBBFreq = 0.0;
float HottestBlockFreq = 0.0;
@@ -707,46 +769,19 @@ void MLEvictAdvisor::extractFeatures(
if (LI.endIndex() > EndSI)
EndSI = LI.endIndex();
-
- SmallPtrSet<MachineInstr *, 8> Visited;
- const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const LIFeatureComponents LIFC = getLIFeatureComponents(LI);
NrBrokenHints += VRM->hasPreferredPhys(LI.reg());
- for (MachineRegisterInfo::reg_instr_nodbg_iterator
- I = MRI->reg_instr_nodbg_begin(LI.reg()),
- E = MRI->reg_instr_nodbg_end();
- I != E;) {
- MachineInstr *MI = &*(I++);
+ NrDefsAndUses += LIFC.NrDefsAndUses;
+ HottestBlockFreq = std::max(HottestBlockFreq, LIFC.HottestBlockFreq);
+ R += LIFC.R;
+ W += LIFC.W;
+ RW += LIFC.RW;
- ++NrDefsAndUses;
- if (!Visited.insert(MI).second)
- continue;
+ IndVarUpdates += LIFC.IndVarUpdates;
- if (MI->isIdentityCopy() || MI->isImplicitDef())
- continue;
-
- bool Reads, Writes;
- std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg());
-
- float Freq = MBFI.getBlockFreqRelativeToEntryBlock(MI->getParent());
- if (Freq > HottestBlockFreq)
- HottestBlockFreq = Freq;
- R += (Reads && !Writes) * Freq;
- W += (!Reads && Writes) * Freq;
- RW += (Reads && Writes) * Freq;
-
- auto *MBB = MI->getParent();
- auto *Loop = Loops.getLoopFor(MBB);
- bool IsExiting = Loop ? Loop->isLoopExiting(MBB) : false;
-
- if (Writes && IsExiting && LIS->isLiveOutOfMBB(LI, MBB))
- IndVarUpdates += Freq;
-
- if (MI->isCopy() && VirtRegAuxInfo::copyHint(MI, LI.reg(), TRI, *MRI))
- HintWeights += Freq;
- }
- NrRematerializable += VirtRegAuxInfo::isRematerializable(
- LI, *LIS, *VRM, *MF.getSubtarget().getInstrInfo());
+ HintWeights += LIFC.HintWeights;
+ NrRematerializable += LIFC.IsRemat;
}
size_t Size = 0;
if (!Intervals.empty()) {
diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 50cbb14e926e..31d4fc7d02bf 100644
--- a/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -400,12 +400,14 @@ bool MachineModuleInfoWrapperPass::doInitialization(Module &M) {
// FIXME: Do this for new pass manager.
LLVMContext &Ctx = M.getContext();
MMI.getContext().setDiagnosticHandler(
- [&Ctx](const SMDiagnostic &SMD, bool IsInlineAsm, const SourceMgr &SrcMgr,
- std::vector<const MDNode *> &LocInfos) {
+ [&Ctx, &M](const SMDiagnostic &SMD, bool IsInlineAsm,
+ const SourceMgr &SrcMgr,
+ std::vector<const MDNode *> &LocInfos) {
unsigned LocCookie = 0;
if (IsInlineAsm)
LocCookie = getLocCookie(SMD, SrcMgr, LocInfos);
- Ctx.diagnose(DiagnosticInfoSrcMgr(SMD, IsInlineAsm, LocCookie));
+ Ctx.diagnose(
+ DiagnosticInfoSrcMgr(SMD, M.getName(), IsInlineAsm, LocCookie));
});
MMI.DbgInfoAvailable = !M.debug_compile_units().empty();
return false;
diff --git a/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp b/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp
index e4da179efcc4..aa63411df965 100644
--- a/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp
+++ b/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp
@@ -66,8 +66,7 @@ MachineModuleSlotTracker::MachineModuleSlotTracker(
const MachineFunction *MF, bool ShouldInitializeAllMetadata)
: ModuleSlotTracker(MF->getFunction().getParent(),
ShouldInitializeAllMetadata),
- TheFunction(MF->getFunction()), TheMMI(MF->getMMI()), MDNStartSlot(0),
- MDNEndSlot(0) {
+ TheFunction(MF->getFunction()), TheMMI(MF->getMMI()) {
setProcessHook([this](AbstractSlotTrackerStorage *AST, const Module *M,
bool ShouldInitializeAllMetadata) {
this->processMachineModule(AST, M, ShouldInitializeAllMetadata);
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 19bf87d3e290..1a4ad53ddf81 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -43,8 +43,7 @@ void MachineRegisterInfo::Delegate::anchor() {}
MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF)
: MF(MF), TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() &&
- EnableSubRegLiveness),
- IsUpdatedCSRsInitialized(false) {
+ EnableSubRegLiveness) {
unsigned NumRegs = getTargetRegisterInfo()->getNumRegs();
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 005d4ad1a328..c9d3e473062b 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1909,7 +1909,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
const Register Reg = MO->getReg();
if (!Reg)
return;
- if (MRI->tracksLiveness() && !MI->isDebugValue())
+ if (MRI->tracksLiveness() && !MI->isDebugInstr())
checkLiveness(MO, MONum);
// Verify the consistency of tied operands.
diff --git a/llvm/lib/CodeGen/PostRASchedulerList.cpp b/llvm/lib/CodeGen/PostRASchedulerList.cpp
index d7cd0a583cee..aac46cb22084 100644
--- a/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -139,7 +139,7 @@ namespace {
///
/// This is the instruction number from the top of the current block, not
/// the SlotIndex. It is only used by the AntiDepBreaker.
- unsigned EndIndex;
+ unsigned EndIndex = 0;
public:
SchedulePostRATDList(
@@ -206,7 +206,7 @@ SchedulePostRATDList::SchedulePostRATDList(
const RegisterClassInfo &RCI,
TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs)
- : ScheduleDAGInstrs(MF, &MLI), AA(AA), EndIndex(0) {
+ : ScheduleDAGInstrs(MF, &MLI), AA(AA) {
const InstrItineraryData *InstrItins =
MF.getSubtarget().getInstrItineraryData();
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
index 87df7bb4a689..fc5d1104a999 100644
--- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
@@ -25,7 +25,7 @@
using namespace llvm;
static cl::opt<RegAllocEvictionAdvisorAnalysis::AdvisorMode> Mode(
- "regalloc-enable-advisor", cl::Hidden,
+ "regalloc-enable-advisor", cl::Hidden, cl::ZeroOrMore,
cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default),
cl::desc("Enable regalloc advisor mode"),
cl::values(
@@ -66,7 +66,7 @@ public:
private:
std::unique_ptr<RegAllocEvictionAdvisor>
- getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+ getAdvisor(MachineFunction &MF, const RAGreedy &RA) override {
return std::make_unique<DefaultEvictionAdvisor>(MF, RA);
}
bool doInitialization(Module &M) override {
@@ -113,7 +113,7 @@ StringRef RegAllocEvictionAdvisorAnalysis::getPassName() const {
llvm_unreachable("Unknown advisor kind");
}
-RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(const MachineFunction &MF,
+RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(MachineFunction &MF,
const RAGreedy &RA)
: MF(MF), RA(RA), Matrix(RA.getInterferenceMatrix()),
LIS(RA.getLiveIntervals()), VRM(RA.getVirtRegMap()),
@@ -122,3 +122,178 @@ RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(const MachineFunction &MF,
EnableLocalReassign(EnableLocalReassignment ||
MF.getSubtarget().enableRALocalReassignment(
MF.getTarget().getOptLevel())) {}
+
+/// shouldEvict - determine if A should evict the assigned live range B. The
+/// eviction policy defined by this function together with the allocation order
+/// defined by enqueue() decides which registers ultimately end up being split
+/// and spilled.
+///
+/// Cascade numbers are used to prevent infinite loops if this function is a
+/// cyclic relation.
+///
+/// @param A The live range to be assigned.
+/// @param IsHint True when A is about to be assigned to its preferred
+/// register.
+/// @param B The live range to be evicted.
+/// @param BreaksHint True when B is already assigned to its preferred register.
+bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint,
+ LiveInterval &B,
+ bool BreaksHint) const {
+ bool CanSplit = RA.getExtraInfo().getStage(B) < RS_Spill;
+
+ // Be fairly aggressive about following hints as long as the evictee can be
+ // split.
+ if (CanSplit && IsHint && !BreaksHint)
+ return true;
+
+ if (A.weight() > B.weight()) {
+ LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight() << '\n');
+ return true;
+ }
+ return false;
+}
+
+/// canEvictHintInterference - return true if the interference for VirtReg
+/// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg.
+bool DefaultEvictionAdvisor::canEvictHintInterference(
+ LiveInterval &VirtReg, MCRegister PhysReg,
+ const SmallVirtRegSet &FixedRegisters) const {
+ EvictionCost MaxCost;
+ MaxCost.setBrokenHints(1);
+ return canEvictInterferenceBasedOnCost(VirtReg, PhysReg, true, MaxCost,
+ FixedRegisters);
+}
+
+/// canEvictInterferenceBasedOnCost - Return true if all interferences between
+/// VirtReg and PhysReg can be evicted.
+///
+/// @param VirtReg Live range that is about to be assigned.
+/// @param PhysReg Desired register for assignment.
+/// @param IsHint True when PhysReg is VirtReg's preferred register.
+/// @param MaxCost Only look for cheaper candidates and update with new cost
+/// when returning true.
+/// @returns True when interference can be evicted cheaper than MaxCost.
+bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
+ LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
+ EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const {
+ // It is only possible to evict virtual register interference.
+ if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
+ return false;
+
+ bool IsLocal = VirtReg.empty() || LIS->intervalIsInOneMBB(VirtReg);
+
+ // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
+ // involved in an eviction before. If a cascade number was assigned, deny
+ // evicting anything with the same or a newer cascade number. This prevents
+ // infinite eviction loops.
+ //
+ // This works out so a register without a cascade number is allowed to evict
+ // anything, and it can be evicted by anything.
+ unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg());
+
+ EvictionCost Cost;
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ // If there is 10 or more interferences, chances are one is heavier.
+ const auto &Interferences = Q.interferingVRegs(10);
+ if (Interferences.size() >= 10)
+ return false;
+
+ // Check if any interfering live range is heavier than MaxWeight.
+ for (LiveInterval *Intf : reverse(Interferences)) {
+ assert(Register::isVirtualRegister(Intf->reg()) &&
+ "Only expecting virtual register interference from query");
+
+ // Do not allow eviction of a virtual register if we are in the middle
+ // of last-chance recoloring and this virtual register is one that we
+ // have scavenged a physical register for.
+ if (FixedRegisters.count(Intf->reg()))
+ return false;
+
+ // Never evict spill products. They cannot split or spill.
+ if (RA.getExtraInfo().getStage(*Intf) == RS_Done)
+ return false;
+ // Once a live range becomes small enough, it is urgent that we find a
+ // register for it. This is indicated by an infinite spill weight. These
+ // urgent live ranges get to evict almost anything.
+ //
+ // Also allow urgent evictions of unspillable ranges from a strictly
+ // larger allocation order.
+ bool Urgent =
+ !VirtReg.isSpillable() &&
+ (Intf->isSpillable() ||
+ RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) <
+ RegClassInfo.getNumAllocatableRegs(
+ MRI->getRegClass(Intf->reg())));
+ // Only evict older cascades or live ranges without a cascade.
+ unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg());
+ if (Cascade <= IntfCascade) {
+ if (!Urgent)
+ return false;
+ // We permit breaking cascades for urgent evictions. It should be the
+ // last resort, though, so make it really expensive.
+ Cost.BrokenHints += 10;
+ }
+ // Would this break a satisfied hint?
+ bool BreaksHint = VRM->hasPreferredPhys(Intf->reg());
+ // Update eviction cost.
+ Cost.BrokenHints += BreaksHint;
+ Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight());
+ // Abort if this would be too expensive.
+ if (!(Cost < MaxCost))
+ return false;
+ if (Urgent)
+ continue;
+ // Apply the eviction policy for non-urgent evictions.
+ if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
+ return false;
+ // If !MaxCost.isMax(), then we're just looking for a cheap register.
+ // Evicting another local live range in this case could lead to suboptimal
+ // coloring.
+ if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
+ (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) {
+ return false;
+ }
+ }
+ }
+ MaxCost = Cost;
+ return true;
+}
+
+MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate(
+ LiveInterval &VirtReg, const AllocationOrder &Order,
+ uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
+ // Keep track of the cheapest interference seen so far.
+ EvictionCost BestCost;
+ BestCost.setMax();
+ MCRegister BestPhys;
+ auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit);
+ if (!MaybeOrderLimit)
+ return MCRegister::NoRegister;
+ unsigned OrderLimit = *MaybeOrderLimit;
+
+ // When we are just looking for a reduced cost per use, don't break any
+ // hints, and only evict smaller spill weights.
+ if (CostPerUseLimit < uint8_t(~0u)) {
+ BestCost.BrokenHints = 0;
+ BestCost.MaxWeight = VirtReg.weight();
+ }
+
+ for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
+ ++I) {
+ MCRegister PhysReg = *I;
+ assert(PhysReg);
+ if (!canAllocatePhysReg(CostPerUseLimit, PhysReg) ||
+ !canEvictInterferenceBasedOnCost(VirtReg, PhysReg, false, BestCost,
+ FixedRegisters))
+ continue;
+
+ // Best so far.
+ BestPhys = PhysReg;
+
+ // Stop if the hint can be used.
+ if (I.isHint())
+ break;
+ }
+ return BestPhys;
+}
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
index 33e03aed81a7..1f40386db8da 100644
--- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
@@ -115,7 +115,7 @@ public:
bool isUnusedCalleeSavedReg(MCRegister PhysReg) const;
protected:
- RegAllocEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA);
+ RegAllocEvictionAdvisor(MachineFunction &MF, const RAGreedy &RA);
Register canReassign(LiveInterval &VirtReg, Register PrevReg) const;
@@ -173,7 +173,7 @@ public:
/// Get an advisor for the given context (i.e. machine function, etc)
virtual std::unique_ptr<RegAllocEvictionAdvisor>
- getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0;
+ getAdvisor(MachineFunction &MF, const RAGreedy &RA) = 0;
AdvisorMode getAdvisorMode() const { return Mode; }
protected:
@@ -200,7 +200,7 @@ RegAllocEvictionAdvisorAnalysis *createDevelopmentModeAdvisor();
// out of RegAllocGreedy.cpp
class DefaultEvictionAdvisor : public RegAllocEvictionAdvisor {
public:
- DefaultEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA)
+ DefaultEvictionAdvisor(MachineFunction &MF, const RAGreedy &RA)
: RegAllocEvictionAdvisor(MF, RA) {}
private:
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 6ea6dbcbbb74..7870574df5b2 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -440,143 +440,6 @@ Register RegAllocEvictionAdvisor::canReassign(LiveInterval &VirtReg,
return PhysReg;
}
-/// shouldEvict - determine if A should evict the assigned live range B. The
-/// eviction policy defined by this function together with the allocation order
-/// defined by enqueue() decides which registers ultimately end up being split
-/// and spilled.
-///
-/// Cascade numbers are used to prevent infinite loops if this function is a
-/// cyclic relation.
-///
-/// @param A The live range to be assigned.
-/// @param IsHint True when A is about to be assigned to its preferred
-/// register.
-/// @param B The live range to be evicted.
-/// @param BreaksHint True when B is already assigned to its preferred register.
-bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint,
- LiveInterval &B,
- bool BreaksHint) const {
- bool CanSplit = RA.getExtraInfo().getStage(B) < RS_Spill;
-
- // Be fairly aggressive about following hints as long as the evictee can be
- // split.
- if (CanSplit && IsHint && !BreaksHint)
- return true;
-
- if (A.weight() > B.weight()) {
- LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight() << '\n');
- return true;
- }
- return false;
-}
-
-/// canEvictHintInterference - return true if the interference for VirtReg
-/// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg.
-bool DefaultEvictionAdvisor::canEvictHintInterference(
- LiveInterval &VirtReg, MCRegister PhysReg,
- const SmallVirtRegSet &FixedRegisters) const {
- EvictionCost MaxCost;
- MaxCost.setBrokenHints(1);
- return canEvictInterferenceBasedOnCost(VirtReg, PhysReg, true, MaxCost,
- FixedRegisters);
-}
-
-/// canEvictInterferenceBasedOnCost - Return true if all interferences between
-/// VirtReg and PhysReg can be evicted.
-///
-/// @param VirtReg Live range that is about to be assigned.
-/// @param PhysReg Desired register for assignment.
-/// @param IsHint True when PhysReg is VirtReg's preferred register.
-/// @param MaxCost Only look for cheaper candidates and update with new cost
-/// when returning true.
-/// @returns True when interference can be evicted cheaper than MaxCost.
-bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
- LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
- EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const {
- // It is only possible to evict virtual register interference.
- if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
- return false;
-
- bool IsLocal = VirtReg.empty() || LIS->intervalIsInOneMBB(VirtReg);
-
- // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
- // involved in an eviction before. If a cascade number was assigned, deny
- // evicting anything with the same or a newer cascade number. This prevents
- // infinite eviction loops.
- //
- // This works out so a register without a cascade number is allowed to evict
- // anything, and it can be evicted by anything.
- unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg());
-
- EvictionCost Cost;
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
- // If there is 10 or more interferences, chances are one is heavier.
- const auto &Interferences = Q.interferingVRegs(10);
- if (Interferences.size() >= 10)
- return false;
-
- // Check if any interfering live range is heavier than MaxWeight.
- for (LiveInterval *Intf : reverse(Interferences)) {
- assert(Register::isVirtualRegister(Intf->reg()) &&
- "Only expecting virtual register interference from query");
-
- // Do not allow eviction of a virtual register if we are in the middle
- // of last-chance recoloring and this virtual register is one that we
- // have scavenged a physical register for.
- if (FixedRegisters.count(Intf->reg()))
- return false;
-
- // Never evict spill products. They cannot split or spill.
- if (RA.getExtraInfo().getStage(*Intf) == RS_Done)
- return false;
- // Once a live range becomes small enough, it is urgent that we find a
- // register for it. This is indicated by an infinite spill weight. These
- // urgent live ranges get to evict almost anything.
- //
- // Also allow urgent evictions of unspillable ranges from a strictly
- // larger allocation order.
- bool Urgent =
- !VirtReg.isSpillable() &&
- (Intf->isSpillable() ||
- RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) <
- RegClassInfo.getNumAllocatableRegs(
- MRI->getRegClass(Intf->reg())));
- // Only evict older cascades or live ranges without a cascade.
- unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg());
- if (Cascade <= IntfCascade) {
- if (!Urgent)
- return false;
- // We permit breaking cascades for urgent evictions. It should be the
- // last resort, though, so make it really expensive.
- Cost.BrokenHints += 10;
- }
- // Would this break a satisfied hint?
- bool BreaksHint = VRM->hasPreferredPhys(Intf->reg());
- // Update eviction cost.
- Cost.BrokenHints += BreaksHint;
- Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight());
- // Abort if this would be too expensive.
- if (!(Cost < MaxCost))
- return false;
- if (Urgent)
- continue;
- // Apply the eviction policy for non-urgent evictions.
- if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
- return false;
- // If !MaxCost.isMax(), then we're just looking for a cheap register.
- // Evicting another local live range in this case could lead to suboptimal
- // coloring.
- if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
- (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) {
- return false;
- }
- }
- }
- MaxCost = Cost;
- return true;
-}
-
/// Return true if all interferences between VirtReg and PhysReg between
/// Start and End can be evicted.
///
@@ -757,44 +620,6 @@ bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit,
return true;
}
-MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate(
- LiveInterval &VirtReg, const AllocationOrder &Order,
- uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
- // Keep track of the cheapest interference seen so far.
- EvictionCost BestCost;
- BestCost.setMax();
- MCRegister BestPhys;
- auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit);
- if (!MaybeOrderLimit)
- return MCRegister::NoRegister;
- unsigned OrderLimit = *MaybeOrderLimit;
-
- // When we are just looking for a reduced cost per use, don't break any
- // hints, and only evict smaller spill weights.
- if (CostPerUseLimit < uint8_t(~0u)) {
- BestCost.BrokenHints = 0;
- BestCost.MaxWeight = VirtReg.weight();
- }
-
- for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
- ++I) {
- MCRegister PhysReg = *I;
- assert(PhysReg);
- if (!canAllocatePhysReg(CostPerUseLimit, PhysReg) ||
- !canEvictInterferenceBasedOnCost(VirtReg, PhysReg, false, BestCost,
- FixedRegisters))
- continue;
-
- // Best so far.
- BestPhys = PhysReg;
-
- // Stop if the hint can be used.
- if (I.isHint())
- break;
- }
- return BestPhys;
-}
-
/// tryEvict - Try to evict all interferences for a physreg.
/// @param VirtReg Currently unassigned virtual register.
/// @param Order Physregs to try.
@@ -2922,6 +2747,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
RegCosts = TRI->getRegisterCosts(*MF);
+ ExtraInfo.emplace();
+ EvictAdvisor =
+ getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
+
VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI));
@@ -2931,9 +2760,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
- ExtraInfo.emplace();
- EvictAdvisor =
- getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
+
IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
GlobalCand.resize(32); // This will grow as needed.
SetOfBrokenHints.clear();
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 932f263d2558..041d7e5b4a4a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -143,7 +143,7 @@ namespace {
SelectionDAG &DAG;
const TargetLowering &TLI;
const SelectionDAGTargetInfo *STI;
- CombineLevel Level;
+ CombineLevel Level = BeforeLegalizeTypes;
CodeGenOpt::Level OptLevel;
bool LegalDAG = false;
bool LegalOperations = false;
@@ -238,8 +238,7 @@ namespace {
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()),
- STI(D.getSubtarget().getSelectionDAGInfo()),
- Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
+ STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
ForCodeSize = DAG.shouldOptForSize();
DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
@@ -441,6 +440,7 @@ namespace {
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
SDValue visitFunnelShift(SDNode *N);
+ SDValue visitSHLSAT(SDNode *N);
SDValue visitRotate(SDNode *N);
SDValue visitABS(SDNode *N);
SDValue visitBSWAP(SDNode *N);
@@ -907,9 +907,8 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
return true;
}
- if (N.getOpcode() != ISD::SELECT_CC ||
- !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
- !TLI.isConstFalseVal(N.getOperand(3).getNode()))
+ if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
+ !TLI.isConstFalseVal(N.getOperand(3)))
return false;
if (TLI.getBooleanContents(N.getValueType()) ==
@@ -1654,6 +1653,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ROTL: return visitRotate(N);
case ISD::FSHL:
case ISD::FSHR: return visitFunnelShift(N);
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT: return visitSHLSAT(N);
case ISD::ABS: return visitABS(N);
case ISD::BSWAP: return visitBSWAP(N);
case ISD::BITREVERSE: return visitBITREVERSE(N);
@@ -5530,8 +5531,6 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
// Some constants may need fixing up later if they are too large.
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
- if (Mask->getValueType(0) != C->getValueType(0))
- return false;
if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
(Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
NodesWithConsts.insert(N);
@@ -5565,9 +5564,9 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
case ISD::AssertZext: {
unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
- EVT VT = Op.getOpcode() == ISD::AssertZext
- ? cast<VTSDNode>(Op.getOperand(1))->getVT()
- : Op.getOperand(0).getValueType();
+ EVT VT = Op.getOpcode() == ISD::AssertZext ?
+ cast<VTSDNode>(Op.getOperand(1))->getVT() :
+ Op.getOperand(0).getValueType();
// We can accept extending nodes if the mask is wider or an equal
// width to the original type.
@@ -5575,15 +5574,6 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
continue;
break;
}
- case ISD::ANY_EXTEND: {
- unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
- EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
- EVT VT = Op.getOperand(0).getValueType();
- if (ExtVT.bitsGE(VT))
- break;
- // Fallthrough to searching for nodes from the operands of the extend.
- LLVM_FALLTHROUGH;
- }
case ISD::OR:
case ISD::XOR:
case ISD::AND:
@@ -5643,14 +5633,12 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
// masking.
if (FixupNode) {
LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
- SDValue MaskOpT = DAG.getZExtOrTrunc(MaskOp, SDLoc(FixupNode),
- FixupNode->getValueType(0));
- SDValue And =
- DAG.getNode(ISD::AND, SDLoc(FixupNode), FixupNode->getValueType(0),
- SDValue(FixupNode, 0), MaskOpT);
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
+ FixupNode->getValueType(0),
+ SDValue(FixupNode, 0), MaskOp);
DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
if (And.getOpcode() == ISD ::AND)
- DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOpT);
+ DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
}
// Narrow any constants that need it.
@@ -5659,12 +5647,10 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
SDValue Op1 = LogicN->getOperand(1);
if (isa<ConstantSDNode>(Op0))
- std::swap(Op0, Op1);
+ std::swap(Op0, Op1);
- SDValue MaskOpT =
- DAG.getZExtOrTrunc(MaskOp, SDLoc(Op1), Op1.getValueType());
- SDValue And =
- DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOpT);
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
+ Op1, MaskOp);
DAG.UpdateNodeOperands(LogicN, Op0, And);
}
@@ -5672,14 +5658,12 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
// Create narrow loads.
for (auto *Load : Loads) {
LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
- SDValue MaskOpT =
- DAG.getZExtOrTrunc(MaskOp, SDLoc(Load), Load->getValueType(0));
SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
- SDValue(Load, 0), MaskOpT);
+ SDValue(Load, 0), MaskOp);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
if (And.getOpcode() == ISD ::AND)
And = SDValue(
- DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOpT), 0);
+ DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
SDValue NewLoad = reduceLoadWidth(And.getNode());
assert(NewLoad &&
"Shouldn't be masking the load if it can't be narrowed");
@@ -8036,8 +8020,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// fold !(x cc y) -> (x !cc y)
unsigned N0Opcode = N0.getOpcode();
SDValue LHS, RHS, CC;
- if (TLI.isConstTrueVal(N1.getNode()) &&
- isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
+ if (TLI.isConstTrueVal(N1) &&
+ isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
LHS.getValueType());
if (!LegalOperations ||
@@ -9348,6 +9332,22 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (SDValue V = DAG.simplifyShift(N0, N1))
+ return V;
+
+ EVT VT = N0.getValueType();
+
+ // fold (*shlsat c1, c2) -> c1<<c2
+ if (SDValue C =
+ DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
+ return C;
+
+ return SDValue();
+}
+
// Given a ABS node, detect the following pattern:
// (ABS (SUB (EXTEND a), (EXTEND b))).
// Generates UABD/SABD instruction.
@@ -14580,7 +14580,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
unsigned NumElts = 1;
EVT VT = N->getValueType(0);
if (VT.isVector() && DAG.isSplatValue(N1))
- NumElts = VT.getVectorNumElements();
+ NumElts = VT.getVectorMinNumElements();
if (!MinUses || (N1->use_size() * NumElts) < MinUses)
return SDValue();
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index bfde35935c7b..d8ef79fe9a7b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1838,8 +1838,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
TII(*MF->getSubtarget().getInstrInfo()),
TLI(*MF->getSubtarget().getTargetLowering()),
TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
- SkipTargetIndependentISel(SkipTargetIndependentISel),
- LastLocalValue(nullptr), EmitStartPt(nullptr) {}
+ SkipTargetIndependentISel(SkipTargetIndependentISel) {}
FastISel::~FastISel() = default;
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 403f34573899..55f6f288f3e3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -47,8 +47,7 @@ static cl::opt<int> HighLatencyCycles(
"instructions take for targets with no itinerary"));
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
- : ScheduleDAG(mf), BB(nullptr), DAG(nullptr),
- InstrItins(mf.getSubtarget().getInstrItineraryData()) {}
+ : ScheduleDAG(mf), InstrItins(mf.getSubtarget().getInstrItineraryData()) {}
/// Run - perform scheduling.
///
@@ -577,7 +576,7 @@ void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
// Construct a RegDefIter for this SUnit and find the first valid value.
ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
const ScheduleDAGSDNodes *SD)
- : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
+ : SchedDAG(SD), Node(SU->getNode()) {
InitNodeNumDefs();
Advance();
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 8c28ce403c9b..99bbaeb19182 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -45,8 +45,8 @@ class InstrItineraryData;
///
class ScheduleDAGSDNodes : public ScheduleDAG {
public:
- MachineBasicBlock *BB;
- SelectionDAG *DAG; // DAG of the current basic block
+ MachineBasicBlock *BB = nullptr;
+ SelectionDAG *DAG = nullptr; // DAG of the current basic block
const InstrItineraryData *InstrItins;
/// The schedule. Null SUnit*'s represent noop instructions.
@@ -138,8 +138,8 @@ class InstrItineraryData;
class RegDefIter {
const ScheduleDAGSDNodes *SchedDAG;
const SDNode *Node;
- unsigned DefIdx;
- unsigned NodeNumDefs;
+ unsigned DefIdx = 0;
+ unsigned NodeNumDefs = 0;
MVT ValueType;
public:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45f3005e8f57..d5998d166d25 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2449,7 +2449,7 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
switch (V.getOpcode()) {
default:
return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts,
- *this, 0);
+ *this);
case ISD::Constant: {
const APInt &CVal = cast<ConstantSDNode>(V)->getAPIntValue();
APInt NewVal = CVal & DemandedBits;
@@ -3082,6 +3082,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1);
+ // TODO: SelfMultiply can be poison, but not undef.
+ SelfMultiply &= isGuaranteedNotToBeUndefOrPoison(
+ Op.getOperand(0), DemandedElts, false, Depth + 1);
Known = KnownBits::mul(Known, Known2, SelfMultiply);
break;
}
@@ -5240,6 +5243,8 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
case ISD::UADDSAT: return C1.uadd_sat(C2);
case ISD::SSUBSAT: return C1.ssub_sat(C2);
case ISD::USUBSAT: return C1.usub_sat(C2);
+ case ISD::SSHLSAT: return C1.sshl_sat(C2);
+ case ISD::USHLSAT: return C1.ushl_sat(C2);
case ISD::UDIV:
if (!C2.getBoolValue())
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 41460f78e1c2..01230a36e744 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4014,7 +4014,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
Type *Ty = I.getAllocatedType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
auto &DL = DAG.getDataLayout();
- uint64_t TySize = DL.getTypeAllocSize(Ty);
+ TypeSize TySize = DL.getTypeAllocSize(Ty);
MaybeAlign Alignment = std::max(DL.getPrefTypeAlign(Ty), I.getAlign());
SDValue AllocSize = getValue(I.getArraySize());
@@ -4023,9 +4023,15 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
if (AllocSize.getValueType() != IntPtr)
AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
- AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
- AllocSize,
- DAG.getConstant(TySize, dl, IntPtr));
+ if (TySize.isScalable())
+ AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize,
+ DAG.getVScale(dl, IntPtr,
+ APInt(IntPtr.getScalarSizeInBits(),
+ TySize.getKnownMinValue())));
+ else
+ AllocSize =
+ DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize,
+ DAG.getConstant(TySize.getFixedValue(), dl, IntPtr));
// Handle alignment. If the requested alignment is less than or equal to
// the stack alignment, ignore it. If the size is greater than or equal to
@@ -6870,6 +6876,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::experimental_gc_relocate:
visitGCRelocate(cast<GCRelocateInst>(I));
return;
+ case Intrinsic::instrprof_cover:
+ llvm_unreachable("instrprof failed to lower a cover");
case Intrinsic::instrprof_increment:
llvm_unreachable("instrprof failed to lower an increment");
case Intrinsic::instrprof_value_profile:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 77e11b364588..3c786904620a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -319,7 +319,7 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL)
CurDAG(new SelectionDAG(tm, OL)),
SDB(std::make_unique<SelectionDAGBuilder>(*CurDAG, *FuncInfo, *SwiftError,
OL)),
- AA(), GFI(), OptLevel(OL), DAGSize(0) {
+ OptLevel(OL) {
initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
initializeBranchProbabilityInfoWrapperPassPass(
*PassRegistry::getPassRegistry());
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index e2db9633bfb9..dfda7d8b9f81 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -990,6 +990,24 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
return ReturnVal;
}
+/// Return two gc.results if present. First result is a block local
+/// gc.result, second result is a non-block local gc.result. Corresponding
+/// entry will be nullptr if not present.
+static std::pair<const GCResultInst*, const GCResultInst*>
+getGCResultLocality(const GCStatepointInst &S) {
+ std::pair<const GCResultInst *, const GCResultInst*> Res(nullptr, nullptr);
+ for (auto *U : S.users()) {
+ auto *GRI = dyn_cast<GCResultInst>(U);
+ if (!GRI)
+ continue;
+ if (GRI->getParent() == S.getParent())
+ Res.first = GRI;
+ else
+ Res.second = GRI;
+ }
+ return Res;
+}
+
void
SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
const BasicBlock *EHPadBB /*= nullptr*/) {
@@ -1075,12 +1093,11 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
SDValue ReturnValue = LowerAsSTATEPOINT(SI);
// Export the result value if needed
- const std::pair<bool, bool> GCResultLocality = I.getGCResultLocality();
- Type *RetTy = I.getActualReturnType();
+ const auto GCResultLocality = getGCResultLocality(I);
- if (RetTy->isVoidTy() ||
- (!GCResultLocality.first && !GCResultLocality.second)) {
- // The return value is not needed, just generate a poison value.
+ if (!GCResultLocality.first && !GCResultLocality.second) {
+ // The return value is not needed, just generate a poison value.
+ // Note: This covers the void return case.
setValue(&I, DAG.getIntPtrConstant(-1, getCurSDLoc()));
return;
}
@@ -1102,6 +1119,7 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
// manually.
// TODO: To eliminate this problem we can remove gc.result intrinsics
// completely and make statepoint call to return a tuple.
+ Type *RetTy = GCResultLocality.second->getType();
unsigned Reg = FuncInfo.CreateRegs(RetTy);
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
DAG.getDataLayout(), Reg, RetTy,
@@ -1168,7 +1186,7 @@ void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) {
// register because statepoint and actual call return types can be
// different, and getValue() will use CopyFromReg of the wrong type,
// which is always i32 in our case.
- Type *RetTy = SI->getActualReturnType();
+ Type *RetTy = CI.getType();
SDValue CopyFromReg = getCopyFromRegs(SI, RetTy);
assert(CopyFromReg.getNode());
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a98c21f16c71..f6d1fa87676f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -63,7 +63,7 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
Attribute::DereferenceableOrNull, Attribute::NoAlias,
- Attribute::NonNull})
+ Attribute::NonNull, Attribute::NoUndef})
CallerAttrs.removeAttribute(Attr);
if (CallerAttrs.hasAttributes())
@@ -606,6 +606,23 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
}
bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ KnownBits Known;
+
+ bool Simplified =
+ SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
+ if (Simplified) {
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
+ return Simplified;
+}
+
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
KnownBits &Known,
TargetLoweringOpt &TLO,
unsigned Depth,
@@ -2247,8 +2264,12 @@ bool TargetLowering::SimplifyDemandedBits(
}
break;
}
- case ISD::ADD:
case ISD::MUL:
+ // 'Quadratic Reciprocity': mul(x,x) -> 0 if we're only demanding bit[1]
+ if (DemandedBits == 2 && Op.getOperand(0) == Op.getOperand(1))
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
+ LLVM_FALLTHROUGH;
+ case ISD::ADD:
case ISD::SUB: {
// Add, Sub, and Mul don't demand any bits in positions beyond that
// of the highest bit demanded of them.
@@ -3173,29 +3194,25 @@ bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
// work with truncating build vectors and vectors with elements of less than
// 8 bits.
-bool TargetLowering::isConstTrueVal(const SDNode *N) const {
+bool TargetLowering::isConstTrueVal(SDValue N) const {
if (!N)
return false;
+ unsigned EltWidth;
APInt CVal;
- if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
+ if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true)) {
CVal = CN->getAPIntValue();
- } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
- auto *CN = BV->getConstantSplatNode();
- if (!CN)
- return false;
-
- // If this is a truncating build vector, truncate the splat value.
- // Otherwise, we may fail to match the expected values below.
- unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
- CVal = CN->getAPIntValue();
- if (BVEltWidth < CVal.getBitWidth())
- CVal = CVal.trunc(BVEltWidth);
- } else {
+ EltWidth = N.getValueType().getScalarSizeInBits();
+ } else
return false;
- }
- switch (getBooleanContents(N->getValueType(0))) {
+ // If this is a truncating splat, truncate the splat value.
+ // Otherwise, we may fail to match the expected values below.
+ if (EltWidth < CVal.getBitWidth())
+ CVal = CVal.trunc(EltWidth);
+
+ switch (getBooleanContents(N.getValueType())) {
case UndefinedBooleanContent:
return CVal[0];
case ZeroOrOneBooleanContent:
@@ -3207,7 +3224,7 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
llvm_unreachable("Invalid boolean contents");
}
-bool TargetLowering::isConstFalseVal(const SDNode *N) const {
+bool TargetLowering::isConstFalseVal(SDValue N) const {
if (!N)
return false;
@@ -3742,7 +3759,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
TopSetCC.getOpcode() == ISD::SETCC &&
(N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
- (isConstFalseVal(N1C) ||
+ (isConstFalseVal(N1) ||
isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
diff --git a/llvm/lib/CodeGen/SlotIndexes.cpp b/llvm/lib/CodeGen/SlotIndexes.cpp
index c933031ef37d..ffac68a223bf 100644
--- a/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -20,7 +20,7 @@ using namespace llvm;
char SlotIndexes::ID = 0;
-SlotIndexes::SlotIndexes() : MachineFunctionPass(ID), mf(nullptr) {
+SlotIndexes::SlotIndexes() : MachineFunctionPass(ID) {
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
}
diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
index 1ab6ead3b5f6..99e12fce6513 100644
--- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp
+++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
@@ -27,7 +27,8 @@
namespace llvm {
-bool DwarfStreamer::init(Triple TheTriple) {
+bool DwarfStreamer::init(Triple TheTriple,
+ StringRef Swift5ReflectionSegmentName) {
std::string ErrorStr;
std::string TripleName;
StringRef Context = "dwarf streamer init";
@@ -54,8 +55,9 @@ bool DwarfStreamer::init(Triple TheTriple) {
if (!MSTI)
return error("no subtarget info for target " + TripleName, Context), false;
- MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get()));
- MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false));
+ MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get(), nullptr,
+ nullptr, true, Swift5ReflectionSegmentName));
+ MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false, false));
MC->setObjectFileInfo(MOFI.get());
MAB = TheTarget->createMCAsmBackend(*MSTI, *MRI, MCOptions);
@@ -302,6 +304,18 @@ void DwarfStreamer::emitSwiftAST(StringRef Buffer) {
MS->emitBytes(Buffer);
}
+void DwarfStreamer::emitSwiftReflectionSection(
+ llvm::binaryformat::Swift5ReflectionSectionKind ReflSectionKind,
+ StringRef Buffer, uint32_t Alignment, uint32_t Size) {
+ MCSection *ReflectionSection =
+ MOFI->getSwift5ReflectionSection(ReflSectionKind);
+ if (ReflectionSection == nullptr)
+ return;
+ ReflectionSection->setAlignment(Align(Alignment));
+ MS->SwitchSection(ReflectionSection);
+ MS->emitBytes(Buffer);
+}
+
/// Emit the debug_range section contents for \p FuncRange by
/// translating the original \p Entries. The debug_range section
/// format is totally trivial, consisting just of pairs of address
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 3b8d80c4eeec..99001269e1f8 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2866,6 +2866,90 @@ CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc,
return Builder.CreateCall(Fn, Args, Name);
}
+CallInst *OpenMPIRBuilder::createOMPInteropInit(
+ const LocationDescription &Loc, Value *InteropVar,
+ omp::OMPInteropType InteropType, Value *Device, Value *NumDependences,
+ Value *DependenceAddress, bool HaveNowaitClause) {
+ IRBuilder<>::InsertPointGuard IPG(Builder);
+ Builder.restoreIP(Loc.IP);
+
+ uint32_t SrcLocStrSize;
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+ Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Value *ThreadId = getOrCreateThreadID(Ident);
+ if (Device == nullptr)
+ Device = ConstantInt::get(Int32, -1);
+ Constant *InteropTypeVal = ConstantInt::get(Int64, (int)InteropType);
+ if (NumDependences == nullptr) {
+ NumDependences = ConstantInt::get(Int32, 0);
+ PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
+ DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
+ }
+ Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
+ Value *Args[] = {
+ Ident, ThreadId, InteropVar, InteropTypeVal,
+ Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
+
+ Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
+
+ return Builder.CreateCall(Fn, Args);
+}
+
+CallInst *OpenMPIRBuilder::createOMPInteropDestroy(
+ const LocationDescription &Loc, Value *InteropVar, Value *Device,
+ Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause) {
+ IRBuilder<>::InsertPointGuard IPG(Builder);
+ Builder.restoreIP(Loc.IP);
+
+ uint32_t SrcLocStrSize;
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+ Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Value *ThreadId = getOrCreateThreadID(Ident);
+ if (Device == nullptr)
+ Device = ConstantInt::get(Int32, -1);
+ if (NumDependences == nullptr) {
+ NumDependences = ConstantInt::get(Int32, 0);
+ PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
+ DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
+ }
+ Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
+ Value *Args[] = {
+ Ident, ThreadId, InteropVar, Device,
+ NumDependences, DependenceAddress, HaveNowaitClauseVal};
+
+ Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
+
+ return Builder.CreateCall(Fn, Args);
+}
+
+CallInst *OpenMPIRBuilder::createOMPInteropUse(const LocationDescription &Loc,
+ Value *InteropVar, Value *Device,
+ Value *NumDependences,
+ Value *DependenceAddress,
+ bool HaveNowaitClause) {
+ IRBuilder<>::InsertPointGuard IPG(Builder);
+ Builder.restoreIP(Loc.IP);
+ uint32_t SrcLocStrSize;
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+ Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Value *ThreadId = getOrCreateThreadID(Ident);
+ if (Device == nullptr)
+ Device = ConstantInt::get(Int32, -1);
+ if (NumDependences == nullptr) {
+ NumDependences = ConstantInt::get(Int32, 0);
+ PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
+ DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
+ }
+ Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
+ Value *Args[] = {
+ Ident, ThreadId, InteropVar, Device,
+ NumDependences, DependenceAddress, HaveNowaitClauseVal};
+
+ Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
+
+ return Builder.CreateCall(Fn, Args);
+}
+
CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
const LocationDescription &Loc, llvm::Value *Pointer,
llvm::ConstantInt *Size, const llvm::Twine &Name) {
@@ -3138,7 +3222,7 @@ OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc,
Type *XTy = X.Var->getType();
assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
- Type *XElemTy = XTy->getPointerElementType();
+ Type *XElemTy = X.ElemTy;
assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
XElemTy->isPointerTy()) &&
"OMP atomic read expected a scalar type");
@@ -3180,7 +3264,7 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
Type *XTy = X.Var->getType();
assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
- Type *XElemTy = XTy->getPointerElementType();
+ Type *XElemTy = X.ElemTy;
assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
XElemTy->isPointerTy()) &&
"OMP atomic write expected a scalar type");
@@ -3216,7 +3300,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate(
Type *XTy = X.Var->getType();
assert(XTy->isPointerTy() &&
"OMP Atomic expects a pointer to target memory");
- Type *XElemTy = XTy->getPointerElementType();
+ Type *XElemTy = X.ElemTy;
assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
XElemTy->isPointerTy()) &&
"OMP atomic update expected a scalar type");
@@ -3225,8 +3309,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate(
"OpenMP atomic does not support LT or GT operations");
});
- emitAtomicUpdate(AllocIP, X.Var, Expr, AO, RMWOp, UpdateOp, X.IsVolatile,
- IsXBinopExpr);
+ emitAtomicUpdate(AllocIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp,
+ X.IsVolatile, IsXBinopExpr);
checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
return Builder.saveIP();
}
@@ -3259,13 +3343,10 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
llvm_unreachable("Unsupported atomic update operation");
}
-std::pair<Value *, Value *>
-OpenMPIRBuilder::emitAtomicUpdate(Instruction *AllocIP, Value *X, Value *Expr,
- AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
- AtomicUpdateCallbackTy &UpdateOp,
- bool VolatileX, bool IsXBinopExpr) {
- Type *XElemTy = X->getType()->getPointerElementType();
-
+std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
+ Instruction *AllocIP, Value *X, Type *XElemTy, Value *Expr,
+ AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
+ AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) {
bool DoCmpExch =
((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) ||
(RMWOp == AtomicRMWInst::FSub) ||
@@ -3380,8 +3461,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture(
// If UpdateExpr is 'x' updated with some `expr` not based on 'x',
// 'x' is simply atomically rewritten with 'expr'.
AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
- std::pair<Value *, Value *> Result = emitAtomicUpdate(
- AllocIP, X.Var, Expr, AO, AtomicOp, UpdateOp, X.IsVolatile, IsXBinopExpr);
+ std::pair<Value *, Value *> Result =
+ emitAtomicUpdate(AllocIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
+ X.IsVolatile, IsXBinopExpr);
Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index c92bacaee36d..43fde64c3734 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -23,7 +23,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
@@ -31,11 +30,9 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
-#include <climits>
#include <cstddef>
#include <cstdint>
#include <limits>
@@ -390,26 +387,15 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
// align=4
// alignstack=8
//
- if (hasAttribute(Attribute::Alignment)) {
- std::string Result;
- Result += "align";
- Result += (InAttrGrp) ? "=" : " ";
- Result += utostr(getValueAsInt());
- return Result;
- }
+ if (hasAttribute(Attribute::Alignment))
+ return (InAttrGrp ? "align=" + Twine(getValueAsInt())
+ : "align " + Twine(getValueAsInt()))
+ .str();
auto AttrWithBytesToString = [&](const char *Name) {
- std::string Result;
- Result += Name;
- if (InAttrGrp) {
- Result += "=";
- Result += utostr(getValueAsInt());
- } else {
- Result += "(";
- Result += utostr(getValueAsInt());
- Result += ")";
- }
- return Result;
+ return (InAttrGrp ? Name + ("=" + Twine(getValueAsInt()))
+ : Name + ("(" + Twine(getValueAsInt())) + ")")
+ .str();
};
if (hasAttribute(Attribute::StackAlignment))
@@ -426,26 +412,18 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
Optional<unsigned> NumElems;
std::tie(ElemSize, NumElems) = getAllocSizeArgs();
- std::string Result = "allocsize(";
- Result += utostr(ElemSize);
- if (NumElems.hasValue()) {
- Result += ',';
- Result += utostr(*NumElems);
- }
- Result += ')';
- return Result;
+ return (NumElems
+ ? "allocsize(" + Twine(ElemSize) + "," + Twine(*NumElems) + ")"
+ : "allocsize(" + Twine(ElemSize) + ")")
+ .str();
}
if (hasAttribute(Attribute::VScaleRange)) {
unsigned MinValue = getVScaleRangeMin();
Optional<unsigned> MaxValue = getVScaleRangeMax();
-
- std::string Result = "vscale_range(";
- Result += utostr(MinValue);
- Result += ',';
- Result += utostr(MaxValue.getValueOr(0));
- Result += ')';
- return Result;
+ return ("vscale_range(" + Twine(MinValue) + "," +
+ Twine(MaxValue.getValueOr(0)) + ")")
+ .str();
}
// Convert target-dependent attributes to strings of the form:
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 45459e200b3d..11839c7572e3 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -15,7 +15,6 @@
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 7beafc485d09..99e3afaa8ba8 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -12,15 +12,14 @@
#include "llvm/IR/BasicBlock.h"
#include "SymbolTableListTraitsImpl.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Type.h"
-#include <algorithm>
using namespace llvm;
diff --git a/llvm/lib/IR/Comdat.cpp b/llvm/lib/IR/Comdat.cpp
index 90d5c6e82e5c..2cd6db913621 100644
--- a/llvm/lib/IR/Comdat.cpp
+++ b/llvm/lib/IR/Comdat.cpp
@@ -11,11 +11,13 @@
//===----------------------------------------------------------------------===//
#include "llvm-c/Comdat.h"
-#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMapEntry.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Comdat.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
using namespace llvm;
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 622a984be22c..936b1fc2ff6f 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -30,8 +30,6 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/MathExtras.h"
using namespace llvm;
using namespace llvm::PatternMatch;
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index c13990af360e..b862a159127f 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -16,16 +16,19 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalIFunc.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 43df15e4d932..7ed156d552b1 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -13,6 +13,7 @@
#include "llvm-c/Core.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
@@ -27,6 +28,7 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp
index a6e84dfbe1dd..dc5768dd4f26 100644
--- a/llvm/lib/IR/DIBuilder.cpp
+++ b/llvm/lib/IR/DIBuilder.cpp
@@ -13,12 +13,10 @@
#include "llvm/IR/DIBuilder.h"
#include "LLVMContextImpl.h"
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index 61b2b13bfd03..96f55cf14de8 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -30,12 +30,13 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemAlloc.h"
#include "llvm/Support/TypeSize.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstdlib>
-#include <tuple>
+#include <new>
#include <utility>
using namespace llvm;
diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
index 98f25b035157..fd4b4170c0a7 100644
--- a/llvm/lib/IR/DebugInfo.cpp
+++ b/llvm/lib/IR/DebugInfo.cpp
@@ -14,17 +14,16 @@
#include "llvm-c/DebugInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GVMaterializer.h"
#include "llvm/IR/Instruction.h"
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index 59afb844eb89..b9fc5261fefe 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -15,9 +15,9 @@
#include "MetadataImpl.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
#include <numeric>
diff --git a/llvm/lib/IR/DebugLoc.cpp b/llvm/lib/IR/DebugLoc.cpp
index 993f3a39e6ff..34c9d026b19a 100644
--- a/llvm/lib/IR/DebugLoc.cpp
+++ b/llvm/lib/IR/DebugLoc.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/DebugLoc.h"
-#include "LLVMContextImpl.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugInfo.h"
using namespace llvm;
diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp
index 0a872a81f911..f46f0fdd947d 100644
--- a/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/llvm/lib/IR/DiagnosticInfo.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/DiagnosticInfo.h"
-#include "LLVMContextImpl.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator_range.h"
@@ -24,22 +23,19 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/Regex.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/raw_ostream.h"
#include <atomic>
-#include <cassert>
-#include <memory>
#include <string>
using namespace llvm;
diff --git a/llvm/lib/IR/Dominators.cpp b/llvm/lib/IR/Dominators.cpp
index ace708b252c7..aac8936c7bd6 100644
--- a/llvm/lib/IR/Dominators.cpp
+++ b/llvm/lib/IR/Dominators.cpp
@@ -14,19 +14,27 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/Dominators.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/GenericDomTreeConstruction.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
+
+#include <cassert>
+
+namespace llvm {
+class Argument;
+class Constant;
+class Value;
+} // namespace llvm
using namespace llvm;
bool llvm::VerifyDomInfo = false;
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 1e874d7afa79..726ba80da41b 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -30,7 +30,6 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
@@ -63,7 +62,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index c832499dde06..47e8bc0a916d 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "LLVMContextImpl.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
@@ -21,7 +20,6 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 27528a69be21..4e8f1b506811 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -29,7 +29,6 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/MathExtras.h"
#include <cassert>
#include <cstdint>
#include <vector>
diff --git a/llvm/lib/IR/InlineAsm.cpp b/llvm/lib/IR/InlineAsm.cpp
index a0c48781ced5..203ad6dae1ff 100644
--- a/llvm/lib/IR/InlineAsm.cpp
+++ b/llvm/lib/IR/InlineAsm.cpp
@@ -22,7 +22,6 @@
#include <algorithm>
#include <cassert>
#include <cctype>
-#include <cstddef>
#include <cstdlib>
using namespace llvm;
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 59b7221d1fa2..36a20679863b 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -16,7 +16,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
using namespace llvm;
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index adea7abb75cf..e27758c5de02 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -24,14 +24,12 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp
index 90716d9c81a6..e19ead98a616 100644
--- a/llvm/lib/IR/LLVMContext.cpp
+++ b/llvm/lib/IR/LLVMContext.cpp
@@ -20,8 +20,6 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/LLVMRemarkStreamer.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
#include "llvm/Remarks/RemarkStreamer.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp
index ebbf382aea38..8f9530290459 100644
--- a/llvm/lib/IR/LLVMContextImpl.cpp
+++ b/llvm/lib/IR/LLVMContextImpl.cpp
@@ -11,12 +11,24 @@
//===----------------------------------------------------------------------===//
#include "LLVMContextImpl.h"
+#include "AttributeImpl.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringMapEntry.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/IR/DiagnosticHandler.h"
+#include "llvm/IR/LLVMRemarkStreamer.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OptBisect.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/Remarks/RemarkStreamer.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/TypeSize.h"
#include <cassert>
#include <utility>
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index 0b5f928165e8..70242f4d8f20 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -14,7 +14,6 @@
#ifndef LLVM_LIB_IR_LLVMCONTEXTIMPL_H
#define LLVM_LIB_IR_LLVMCONTEXTIMPL_H
-#include "AttributeImpl.h"
#include "ConstantsContext.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
@@ -34,13 +33,14 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/LLVMRemarkStreamer.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/TrackingMDRef.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/StringSaver.h"
-#include "llvm/Support/YAMLTraits.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -52,9 +52,23 @@
namespace llvm {
+class AttributeImpl;
+class AttributeListImpl;
+class AttributeSetNode;
+class BasicBlock;
+struct DiagnosticHandler;
+class ElementCount;
+class Function;
+class GlobalObject;
+class GlobalValue;
+class InlineAsm;
+class LLVMRemarkStreamer;
+class OptPassGate;
+namespace remarks {
+class RemarkStreamer;
+}
+template <typename T> class StringMapEntry;
class StringRef;
-class Type;
-class Value;
class ValueHandleBase;
using DenseMapAPIntKeyInfo = DenseMapInfo<APInt>;
diff --git a/llvm/lib/IR/LLVMRemarkStreamer.cpp b/llvm/lib/IR/LLVMRemarkStreamer.cpp
index 21ce47457f52..f7e2aa4e9a35 100644
--- a/llvm/lib/IR/LLVMRemarkStreamer.cpp
+++ b/llvm/lib/IR/LLVMRemarkStreamer.cpp
@@ -15,7 +15,9 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/Remarks/RemarkStreamer.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ToolOutputFile.h"
using namespace llvm;
diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp
index 4357c95aa9f6..08cf909a83f9 100644
--- a/llvm/lib/IR/LegacyPassManager.cpp
+++ b/llvm/lib/IR/LegacyPassManager.cpp
@@ -12,28 +12,27 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManagers.h"
-#include "llvm/IR/LegacyPassNameParser.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassTimingInfo.h"
#include "llvm/IR/PrintPasses.h"
-#include "llvm/IR/StructuralHash.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Mutex.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
-#include <unordered_set>
+
+#ifdef EXPENSIVE_CHECKS
+#include "llvm/IR/StructuralHash.h"
+#endif
+
using namespace llvm;
// See PassManagers.h for Pass Manager infrastructure overview.
diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index ebcc493407cc..226718ecac28 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -13,7 +13,6 @@
#include "llvm/IR/Metadata.h"
#include "LLVMContextImpl.h"
#include "MetadataImpl.h"
-#include "SymbolTableListTraitsImpl.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
@@ -44,7 +43,6 @@
#include "llvm/IR/TrackingMDRef.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -52,8 +50,6 @@
#include <cassert>
#include <cstddef>
#include <cstdint>
-#include <iterator>
-#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>
diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp
index a0485a59d0e0..4974b372db2a 100644
--- a/llvm/lib/IR/Module.cpp
+++ b/llvm/lib/IR/Module.cpp
@@ -13,7 +13,6 @@
#include "llvm/IR/Module.h"
#include "SymbolTableListTraitsImpl.h"
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
@@ -39,7 +38,6 @@
#include "llvm/IR/TypeFinder.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueSymbolTable.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Error.h"
diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp
index a0ac7d3ad7d3..0ca40a675fe4 100644
--- a/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -14,7 +14,6 @@
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp
index 08c1fc931e2e..c2a4a7c29915 100644
--- a/llvm/lib/IR/Operator.cpp
+++ b/llvm/lib/IR/Operator.cpp
@@ -14,7 +14,6 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Type.h"
#include "ConstantsContext.h"
diff --git a/llvm/lib/IR/OptBisect.cpp b/llvm/lib/IR/OptBisect.cpp
index 55c0dbad5aab..418311eac814 100644
--- a/llvm/lib/IR/OptBisect.cpp
+++ b/llvm/lib/IR/OptBisect.cpp
@@ -17,7 +17,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
-#include <limits>
using namespace llvm;
diff --git a/llvm/lib/IR/PassManager.cpp b/llvm/lib/IR/PassManager.cpp
index d933003ccdf7..3025c3853d5f 100644
--- a/llvm/lib/IR/PassManager.cpp
+++ b/llvm/lib/IR/PassManager.cpp
@@ -7,10 +7,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/PassManager.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/IR/PassManagerImpl.h"
-#include "llvm/Support/CommandLine.h"
using namespace llvm;
diff --git a/llvm/lib/IR/ProfileSummary.cpp b/llvm/lib/IR/ProfileSummary.cpp
index 05d5ac2c5ddf..9f7335ecbe44 100644
--- a/llvm/lib/IR/ProfileSummary.cpp
+++ b/llvm/lib/IR/ProfileSummary.cpp
@@ -12,9 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/ProfileSummary.h"
-#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
diff --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp
index 101cada77ff9..5cad887b295d 100644
--- a/llvm/lib/IR/PseudoProbe.cpp
+++ b/llvm/lib/IR/PseudoProbe.cpp
@@ -15,7 +15,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
-#include <unordered_set>
+#include "llvm/IR/IntrinsicInst.h"
using namespace llvm;
diff --git a/llvm/lib/IR/ReplaceConstant.cpp b/llvm/lib/IR/ReplaceConstant.cpp
index cfd8deba5a53..d2f676192e7f 100644
--- a/llvm/lib/IR/ReplaceConstant.cpp
+++ b/llvm/lib/IR/ReplaceConstant.cpp
@@ -12,9 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/ReplaceConstant.h"
-#include "llvm/IR/IRBuilder.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/NoFolder.h"
#include "llvm/IR/ValueMap.h"
namespace llvm {
diff --git a/llvm/lib/IR/SSAContext.cpp b/llvm/lib/IR/SSAContext.cpp
index a96e39f32882..5b865692dd7f 100644
--- a/llvm/lib/IR/SSAContext.cpp
+++ b/llvm/lib/IR/SSAContext.cpp
@@ -13,10 +13,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/SSAContext.h"
-#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/Value.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/IR/SafepointIRVerifier.cpp b/llvm/lib/IR/SafepointIRVerifier.cpp
index 2117527a64f0..d8634e0ac7dd 100644
--- a/llvm/lib/IR/SafepointIRVerifier.cpp
+++ b/llvm/lib/IR/SafepointIRVerifier.cpp
@@ -38,10 +38,8 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
diff --git a/llvm/lib/IR/Statepoint.cpp b/llvm/lib/IR/Statepoint.cpp
index b5916e4937c6..508e3cb71ed2 100644
--- a/llvm/lib/IR/Statepoint.cpp
+++ b/llvm/lib/IR/Statepoint.cpp
@@ -13,8 +13,6 @@
#include "llvm/IR/Statepoint.h"
-#include "llvm/IR/Function.h"
-
using namespace llvm;
bool llvm::isStatepointDirectiveAttr(Attribute Attr) {
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index d59d87ad631b..85b658c8a52f 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -21,10 +21,8 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
diff --git a/llvm/lib/IR/Use.cpp b/llvm/lib/IR/Use.cpp
index 99049c0232aa..601a9df5279e 100644
--- a/llvm/lib/IR/Use.cpp
+++ b/llvm/lib/IR/Use.cpp
@@ -8,11 +8,13 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include <new>
namespace llvm {
+class User;
+template <typename> struct simplify_type;
+class Value;
+
void Use::swap(Use &RHS) {
if (Val == RHS.Val)
return;
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index 8741ed917f9f..18aef37e2023 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -13,7 +13,6 @@
#include "llvm/IR/Value.h"
#include "LLVMContextImpl.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -21,7 +20,6 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DerivedUser.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -32,7 +30,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index b84edb789405..989d01e2e395 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -58,7 +58,6 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/ADT/ilist.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
@@ -70,7 +69,6 @@
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
@@ -5811,15 +5809,11 @@ void Verifier::verifyAttachedCallBundle(const CallBase &Call,
"void return type",
Call);
- Assert((BU.Inputs.empty() ||
- (BU.Inputs.size() == 1 && isa<Function>(BU.Inputs.front()))),
- "operand bundle \"clang.arc.attachedcall\" can take either no "
- "arguments or one function as an argument",
+ Assert(BU.Inputs.size() == 1 && isa<Function>(BU.Inputs.front()),
+ "operand bundle \"clang.arc.attachedcall\" requires one function as "
+ "an argument",
Call);
- if (BU.Inputs.empty())
- return;
-
auto *Fn = cast<Function>(BU.Inputs.front());
Intrinsic::ID IID = Fn->getIntrinsicID();
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index f26ef4b21996..418aad26fdd6 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -46,6 +46,7 @@
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/VCSRevision.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -1372,7 +1373,7 @@ public:
sys::fs::OpenFlags::OF_None);
if (EC)
return errorCodeToError(EC);
- WriteIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex);
+ writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex);
if (ShouldEmitImportsFiles) {
EC = EmitImportsFiles(ModulePath, NewModulePath + ".imports",
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 7694c9848384..3877def53c3f 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -38,6 +38,7 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO.h"
@@ -144,7 +145,7 @@ Error Config::addSaveTemps(std::string OutputFileName,
// directly and exit.
if (EC)
reportOpenError(Path, EC.message());
- WriteIndexToFile(Index, OS);
+ writeIndexToFile(Index, OS);
Path = OutputFileName + "index.dot";
raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::OF_None);
@@ -359,7 +360,7 @@ bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
LLVM_DEBUG(
dbgs() << "Post-(Thin)LTO merge bitcode embedding was requested, but "
"command line arguments are not available");
- llvm::EmbedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
+ llvm::embedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
/*EmbedBitcode*/ true, /*EmbedCmdline*/ true,
/*Cmdline*/ CmdArgs);
}
@@ -380,7 +381,7 @@ static void codegen(const Config &Conf, TargetMachine *TM,
return;
if (EmbedBitcode == LTOBitcodeEmbedding::EmbedOptimized)
- llvm::EmbedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
+ llvm::embedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
/*EmbedBitcode*/ true,
/*EmbedCmdline*/ false,
/*CmdArgs*/ std::vector<uint8_t>());
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index 9aea27f0fdba..37e85b6af6ba 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -1052,7 +1052,7 @@ void ThinLTOCodeGenerator::run() {
if (EC)
report_fatal_error(Twine("Failed to open ") + SaveTempPath +
" to save optimized bitcode\n");
- WriteIndexToFile(*Index, OS);
+ writeIndexToFile(*Index, OS);
}
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 119237bb052e..61ec941f50b8 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -788,7 +788,7 @@ void MCAsmStreamer::emitSyntaxDirective() {
}
void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {
- OS << "\t.def\t ";
+ OS << "\t.def\t";
Symbol->print(OS, MAI);
OS << ';';
EmitEOL();
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index 7f639e9c408f..eafcee1e0607 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -67,10 +67,10 @@ static void defaultDiagHandler(const SMDiagnostic &SMD, bool, const SourceMgr &,
MCContext::MCContext(const Triple &TheTriple, const MCAsmInfo *mai,
const MCRegisterInfo *mri, const MCSubtargetInfo *msti,
const SourceMgr *mgr, MCTargetOptions const *TargetOpts,
- bool DoAutoReset)
- : TT(TheTriple), SrcMgr(mgr), InlineSrcMgr(nullptr),
- DiagHandler(defaultDiagHandler), MAI(mai), MRI(mri), MSTI(msti),
- Symbols(Allocator), UsedNames(Allocator),
+ bool DoAutoReset, StringRef Swift5ReflSegmentName)
+ : Swift5ReflectionSegmentName(Swift5ReflSegmentName), TT(TheTriple),
+ SrcMgr(mgr), InlineSrcMgr(nullptr), DiagHandler(defaultDiagHandler),
+ MAI(mai), MRI(mri), MSTI(msti), Symbols(Allocator), UsedNames(Allocator),
InlineAsmUsedLabelNames(Allocator),
CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0),
AutoReset(DoAutoReset), TargetOptions(TargetOpts) {
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index d7f85f793c55..b7890e7f0937 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -299,6 +299,18 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
RemarksSection = Ctx->getMachOSection(
"__LLVM", "__remarks", MachO::S_ATTR_DEBUG, SectionKind::getMetadata());
+ // The architecture of dsymutil makes it very difficult to copy the Swift
+ // reflection metadata sections into the __TEXT segment, so dsymutil creates
+ // these sections in the __DWARF segment instead.
+ if (!Ctx->getSwift5ReflectionSegmentName().empty()) {
+#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) \
+ Swift5ReflectionSections \
+ [llvm::binaryformat::Swift5ReflectionSectionKind::KIND] = \
+ Ctx->getMachOSection(Ctx->getSwift5ReflectionSegmentName().data(), \
+ MACHO, 0, SectionKind::getMetadata());
+#include "llvm/BinaryFormat/Swift.def"
+ }
+
TLSExtraDataSection = TLSTLVSection;
}
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index 42e257516f4e..3d95b18f4672 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/BinaryFormat/Swift.h"
#include "llvm/Object/Error.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
@@ -4765,3 +4766,15 @@ MachOObjectFile::findDsymObjectMembers(StringRef Path) {
Path.str().c_str());
return ObjectPaths;
}
+
+llvm::binaryformat::Swift5ReflectionSectionKind
+MachOObjectFile::mapReflectionSectionNameToEnumValue(
+ StringRef SectionName) const {
+#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) \
+ .Case(MACHO, llvm::binaryformat::Swift5ReflectionSectionKind::KIND)
+ return StringSwitch<llvm::binaryformat::Swift5ReflectionSectionKind>(
+ SectionName)
+#include "llvm/BinaryFormat/Swift.def"
+ .Default(llvm::binaryformat::Swift5ReflectionSectionKind::unknown);
+#undef HANDLE_SWIFT_SECTION
+}
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index ffe2599beaf8..d597148b98ab 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -579,6 +579,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCase(EF_AMDGPU_FEATURE_SRAMECC_V3);
break;
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
BCaseMask(EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4,
EF_AMDGPU_FEATURE_XNACK_V4);
BCaseMask(EF_AMDGPU_FEATURE_XNACK_ANY_V4,
diff --git a/llvm/lib/ObjectYAML/WasmEmitter.cpp b/llvm/lib/ObjectYAML/WasmEmitter.cpp
index 80a8c56f6912..2aa2ef3e5541 100644
--- a/llvm/lib/ObjectYAML/WasmEmitter.cpp
+++ b/llvm/lib/ObjectYAML/WasmEmitter.cpp
@@ -585,19 +585,8 @@ void WasmWriter::writeRelocSection(raw_ostream &OS, WasmYAML::Section &Sec,
writeUint8(OS, Reloc.Type);
encodeULEB128(Reloc.Offset, OS);
encodeULEB128(Reloc.Index, OS);
- switch (Reloc.Type) {
- case wasm::R_WASM_MEMORY_ADDR_LEB:
- case wasm::R_WASM_MEMORY_ADDR_LEB64:
- case wasm::R_WASM_MEMORY_ADDR_SLEB:
- case wasm::R_WASM_MEMORY_ADDR_SLEB64:
- case wasm::R_WASM_MEMORY_ADDR_I32:
- case wasm::R_WASM_MEMORY_ADDR_I64:
- case wasm::R_WASM_FUNCTION_OFFSET_I32:
- case wasm::R_WASM_FUNCTION_OFFSET_I64:
- case wasm::R_WASM_SECTION_OFFSET_I32:
+ if (wasm::relocTypeHasAddend(Reloc.Type))
encodeSLEB128(Reloc.Addend, OS);
- break;
- }
}
}
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 6110bda02406..93637c890c4f 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1454,6 +1454,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
}
+ // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
+ MPM.addPass(OpenMPOptPass());
+
// Remove unused virtual tables to improve the quality of code generated by
// whole-program devirtualization and bitset lowering.
MPM.addPass(GlobalDCEPass());
@@ -1648,6 +1651,10 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
+ // Run the OpenMPOpt CGSCC pass again late.
+ MPM.addPass(
+ createModuleToPostOrderCGSCCPassAdaptor(OpenMPOptCGSCCPass()));
+
invokePeepholeEPCallbacks(MainFPM, Level);
MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 051655e1fed6..07d467305ae5 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1181,32 +1181,6 @@ bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) {
return true;
}
-// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
-// aware this is an ir_level profile so it can set the version flag.
-GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS,
- bool InstrEntryBBEnabled,
- bool DebugInfoCorrelate) {
- const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
- Type *IntTy64 = Type::getInt64Ty(M.getContext());
- uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
- if (IsCS)
- ProfileVersion |= VARIANT_MASK_CSIR_PROF;
- if (InstrEntryBBEnabled)
- ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
- if (DebugInfoCorrelate)
- ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
- auto IRLevelVersionVariable = new GlobalVariable(
- M, IntTy64, true, GlobalValue::WeakAnyLinkage,
- Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
- IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility);
- Triple TT(M.getTargetTriple());
- if (TT.supportsCOMDAT()) {
- IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
- IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
- }
- return IRLevelVersionVariable;
-}
-
// Create the variable for the profile file name.
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput) {
if (InstrProfileOutput.empty())
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 861ff61df510..138b1532d778 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -38,6 +38,28 @@
using namespace llvm;
+// Extracts the variant information from the top 8 bits in the version and
+// returns an enum specifying the variants present.
+static InstrProfKind getProfileKindFromVersion(uint64_t Version) {
+ InstrProfKind ProfileKind = InstrProfKind::Unknown;
+ if (Version & VARIANT_MASK_IR_PROF) {
+ ProfileKind |= InstrProfKind::IR;
+ }
+ if (Version & VARIANT_MASK_CSIR_PROF) {
+ ProfileKind |= InstrProfKind::CS;
+ }
+ if (Version & VARIANT_MASK_INSTR_ENTRY) {
+ ProfileKind |= InstrProfKind::BB;
+ }
+ if (Version & VARIANT_MASK_BYTE_COVERAGE) {
+ ProfileKind |= InstrProfKind::SingleByteCoverage;
+ }
+ if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) {
+ ProfileKind |= InstrProfKind::FunctionEntryOnly;
+ }
+ return ProfileKind;
+}
+
static Expected<std::unique_ptr<MemoryBuffer>>
setupMemoryBuffer(const Twine &Path) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
@@ -154,30 +176,24 @@ bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
// with a leading ':' will be reported an error format.
Error TextInstrProfReader::readHeader() {
Symtab.reset(new InstrProfSymtab());
- bool IsIRInstr = false;
- bool IsEntryFirst = false;
- bool IsCS = false;
while (Line->startswith(":")) {
StringRef Str = Line->substr(1);
if (Str.equals_insensitive("ir"))
- IsIRInstr = true;
+ ProfileKind |= InstrProfKind::IR;
else if (Str.equals_insensitive("fe"))
- IsIRInstr = false;
+ ProfileKind |= InstrProfKind::FE;
else if (Str.equals_insensitive("csir")) {
- IsIRInstr = true;
- IsCS = true;
+ ProfileKind |= InstrProfKind::IR;
+ ProfileKind |= InstrProfKind::CS;
} else if (Str.equals_insensitive("entry_first"))
- IsEntryFirst = true;
+ ProfileKind |= InstrProfKind::BB;
else if (Str.equals_insensitive("not_entry_first"))
- IsEntryFirst = false;
+ ProfileKind &= ~InstrProfKind::BB;
else
return error(instrprof_error::bad_header);
++Line;
}
- IsIRLevelProfile = IsIRInstr;
- InstrEntryBBEnabled = IsEntryFirst;
- HasCSIRLevelProfile = IsCS;
return success();
}
@@ -304,6 +320,11 @@ Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
}
template <class IntPtrT>
+InstrProfKind RawInstrProfReader<IntPtrT>::getProfileKind() const {
+ return getProfileKindFromVersion(Version);
+}
+
+template <class IntPtrT>
bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
if (DataBuffer.getBufferSize() < sizeof(uint64_t))
return false;
@@ -485,9 +506,15 @@ Error RawInstrProfReader<IntPtrT>::readRawCounts(
Record.Counts.clear();
Record.Counts.reserve(NumCounters);
for (uint32_t I = 0; I < NumCounters; I++) {
- const auto *CounterValue = reinterpret_cast<const uint64_t *>(
- CountersStart + CounterBaseOffset + I * getCounterTypeSize());
- Record.Counts.push_back(swap(*CounterValue));
+ const char *Ptr =
+ CountersStart + CounterBaseOffset + I * getCounterTypeSize();
+ if (hasSingleByteCoverage()) {
+ // A value of zero signifies the block is covered.
+ Record.Counts.push_back(*Ptr == 0 ? 1 : 0);
+ } else {
+ const auto *CounterValue = reinterpret_cast<const uint64_t *>(Ptr);
+ Record.Counts.push_back(swap(*CounterValue));
+ }
}
return success();
@@ -718,6 +745,11 @@ InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex(
RecordIterator = HashTable->data_begin();
}
+template <typename HashTableImpl>
+InstrProfKind InstrProfReaderIndex<HashTableImpl>::getProfileKind() const {
+ return getProfileKindFromVersion(FormatVersion);
+}
+
namespace {
/// A remapper that does not apply any remappings.
class InstrProfReaderNullRemapper : public InstrProfReaderRemapper {
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 6628eea80640..8ded1c0426e5 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -166,9 +166,8 @@ public:
} // end namespace llvm
-InstrProfWriter::InstrProfWriter(bool Sparse, bool InstrEntryBBEnabled)
- : Sparse(Sparse), InstrEntryBBEnabled(InstrEntryBBEnabled),
- InfoObj(new InstrProfRecordWriterTrait()) {}
+InstrProfWriter::InstrProfWriter(bool Sparse)
+ : Sparse(Sparse), InfoObj(new InstrProfRecordWriterTrait()) {}
InstrProfWriter::~InstrProfWriter() { delete InfoObj; }
@@ -303,14 +302,16 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
IndexedInstrProf::Header Header;
Header.Magic = IndexedInstrProf::Magic;
Header.Version = IndexedInstrProf::ProfVersion::CurrentVersion;
- if (ProfileKind == PF_IRLevel)
- Header.Version |= VARIANT_MASK_IR_PROF;
- if (ProfileKind == PF_IRLevelWithCS) {
+ if (static_cast<bool>(ProfileKind & InstrProfKind::IR))
Header.Version |= VARIANT_MASK_IR_PROF;
+ if (static_cast<bool>(ProfileKind & InstrProfKind::CS))
Header.Version |= VARIANT_MASK_CSIR_PROF;
- }
- if (InstrEntryBBEnabled)
+ if (static_cast<bool>(ProfileKind & InstrProfKind::BB))
Header.Version |= VARIANT_MASK_INSTR_ENTRY;
+ if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))
+ Header.Version |= VARIANT_MASK_BYTE_COVERAGE;
+ if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly))
+ Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY;
Header.Unused = 0;
Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);
@@ -337,7 +338,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
OS.write(0);
uint64_t CSSummaryOffset = 0;
uint64_t CSSummarySize = 0;
- if (ProfileKind == PF_IRLevelWithCS) {
+ if (static_cast<bool>(ProfileKind & InstrProfKind::CS)) {
CSSummaryOffset = OS.tell();
CSSummarySize = SummarySize / sizeof(uint64_t);
for (unsigned I = 0; I < CSSummarySize; I++)
@@ -358,7 +359,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
// For Context Sensitive summary.
std::unique_ptr<IndexedInstrProf::Summary> TheCSSummary = nullptr;
- if (ProfileKind == PF_IRLevelWithCS) {
+ if (static_cast<bool>(ProfileKind & InstrProfKind::CS)) {
TheCSSummary = IndexedInstrProf::allocSummary(SummarySize);
std::unique_ptr<ProfileSummary> CSPS = CSISB.getSummary();
setSummary(TheCSSummary.get(), *CSPS);
@@ -470,11 +471,13 @@ void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash,
}
Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
- if (ProfileKind == PF_IRLevel)
- OS << "# IR level Instrumentation Flag\n:ir\n";
- else if (ProfileKind == PF_IRLevelWithCS)
+ // Check CS first since it implies an IR level profile.
+ if (static_cast<bool>(ProfileKind & InstrProfKind::CS))
OS << "# CSIR level Instrumentation Flag\n:csir\n";
- if (InstrEntryBBEnabled)
+ else if (static_cast<bool>(ProfileKind & InstrProfKind::IR))
+ OS << "# IR level Instrumentation Flag\n:ir\n";
+
+ if (static_cast<bool>(ProfileKind & InstrProfKind::BB))
OS << "# Always instrument the function entry block\n:entry_first\n";
InstrProfSymtab Symtab;
diff --git a/llvm/lib/Remarks/BitstreamRemarkParser.cpp b/llvm/lib/Remarks/BitstreamRemarkParser.cpp
index 3d586a247962..d74fff4ca7c5 100644
--- a/llvm/lib/Remarks/BitstreamRemarkParser.cpp
+++ b/llvm/lib/Remarks/BitstreamRemarkParser.cpp
@@ -13,6 +13,7 @@
#include "llvm/Remarks/BitstreamRemarkParser.h"
#include "BitstreamRemarkParser.h"
+#include "llvm/Remarks/Remark.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
diff --git a/llvm/lib/Remarks/BitstreamRemarkParser.h b/llvm/lib/Remarks/BitstreamRemarkParser.h
index 0e40e5d66e00..988bc30da6e1 100644
--- a/llvm/lib/Remarks/BitstreamRemarkParser.h
+++ b/llvm/lib/Remarks/BitstreamRemarkParser.h
@@ -16,7 +16,6 @@
#include "llvm/ADT/Optional.h"
#include "llvm/Remarks/BitstreamRemarkContainer.h"
#include "llvm/Remarks/BitstreamRemarkParser.h"
-#include "llvm/Remarks/Remark.h"
#include "llvm/Remarks/RemarkFormat.h"
#include "llvm/Remarks/RemarkParser.h"
#include <cstdint>
@@ -24,6 +23,9 @@
namespace llvm {
namespace remarks {
+
+struct Remark;
+
/// Parses and holds the state of the latest parsed remark.
struct BitstreamRemarkParser : public RemarkParser {
/// The buffer to parse.
diff --git a/llvm/lib/Remarks/RemarkLinker.cpp b/llvm/lib/Remarks/RemarkLinker.cpp
index dd1bba3d1762..62f80918ea1d 100644
--- a/llvm/lib/Remarks/RemarkLinker.cpp
+++ b/llvm/lib/Remarks/RemarkLinker.cpp
@@ -12,10 +12,12 @@
#include "llvm/Remarks/RemarkLinker.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Remarks/BitstreamRemarkContainer.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/SymbolicFile.h"
#include "llvm/Remarks/RemarkParser.h"
#include "llvm/Remarks/RemarkSerializer.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::remarks;
diff --git a/llvm/lib/Remarks/RemarkParser.cpp b/llvm/lib/Remarks/RemarkParser.cpp
index c5c3d0badd3e..f36767efcbf4 100644
--- a/llvm/lib/Remarks/RemarkParser.cpp
+++ b/llvm/lib/Remarks/RemarkParser.cpp
@@ -15,7 +15,6 @@
#include "BitstreamRemarkParser.h"
#include "YAMLRemarkParser.h"
#include "llvm-c/Remarks.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/CBindingWrapping.h"
using namespace llvm;
diff --git a/llvm/lib/Remarks/YAMLRemarkParser.h b/llvm/lib/Remarks/YAMLRemarkParser.h
index df3b908f4779..88b3003010d3 100644
--- a/llvm/lib/Remarks/YAMLRemarkParser.h
+++ b/llvm/lib/Remarks/YAMLRemarkParser.h
@@ -14,14 +14,12 @@
#define LLVM_REMARKS_YAML_REMARK_PARSER_H
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/Remarks/Remark.h"
#include "llvm/Remarks/RemarkParser.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/YAMLParser.h"
-#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
index 827e04f0b10f..9e965aa4f6c4 100644
--- a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
+++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Remarks/YAMLRemarkSerializer.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
using namespace llvm;
diff --git a/llvm/lib/Support/ARMAttributeParser.cpp b/llvm/lib/Support/ARMAttributeParser.cpp
index 908e56319025..9ba224cee0ca 100644
--- a/llvm/lib/Support/ARMAttributeParser.cpp
+++ b/llvm/lib/Support/ARMAttributeParser.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/ARMAttributeParser.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLArrayExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ScopedPrinter.h"
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index 9a4470289bcf..f6003b783245 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -211,6 +211,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
.Case("0xd0d", "cortex-a77")
.Case("0xd41", "cortex-a78")
.Case("0xd44", "cortex-x1")
+ .Case("0xd4c", "cortex-x1c")
.Case("0xd0c", "neoverse-n1")
.Case("0xd49", "neoverse-n2")
.Case("0xd40", "neoverse-v1")
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 6c59d8a7ef04..2b3395b669b8 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -461,15 +461,7 @@ RISCVISAInfo::parseFeatures(unsigned XLen,
ISAInfo->Exts.erase(ExtName.str());
}
- ISAInfo->updateImplication();
- ISAInfo->updateFLen();
- ISAInfo->updateMinVLen();
- ISAInfo->updateMaxELen();
-
- if (Error Result = ISAInfo->checkDependency())
- return std::move(Result);
-
- return std::move(ISAInfo);
+ return RISCVISAInfo::postProcessAndChecking(std::move(ISAInfo));
}
llvm::Expected<std::unique_ptr<RISCVISAInfo>>
@@ -686,26 +678,18 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
}
}
- ISAInfo->updateImplication();
- ISAInfo->updateFLen();
- ISAInfo->updateMinVLen();
- ISAInfo->updateMaxELen();
-
- if (Error Result = ISAInfo->checkDependency())
- return std::move(Result);
-
- return std::move(ISAInfo);
+ return RISCVISAInfo::postProcessAndChecking(std::move(ISAInfo));
}
Error RISCVISAInfo::checkDependency() {
bool IsRv32 = XLen == 32;
- bool HasE = Exts.count("e") == 1;
- bool HasD = Exts.count("d") == 1;
- bool HasF = Exts.count("f") == 1;
- bool HasZve32x = Exts.count("zve32x") == 1;
- bool HasZve32f = Exts.count("zve32f") == 1;
- bool HasZve64d = Exts.count("zve64d") == 1;
- bool HasV = Exts.count("v") == 1;
+ bool HasE = Exts.count("e") != 0;
+ bool HasD = Exts.count("d") != 0;
+ bool HasF = Exts.count("f") != 0;
+ bool HasZve32x = Exts.count("zve32x") != 0;
+ bool HasZve32f = Exts.count("zve32f") != 0;
+ bool HasZve64d = Exts.count("zve64d") != 0;
+ bool HasV = Exts.count("v") != 0;
bool HasVector = HasZve32x || HasV;
bool HasZvl = MinVLen != 0;
@@ -739,12 +723,6 @@ Error RISCVISAInfo::checkDependency() {
errc::invalid_argument,
"zvl*b requires v or zve* extension to also be specified");
- // Could not implement Zve* extension and the V extension at the same time.
- if (HasZve32x && HasV)
- return createStringError(
- errc::invalid_argument,
- "It is illegal to specify the v extension with zve* extensions");
-
// Additional dependency checks.
// TODO: The 'q' extension requires rv64.
// TODO: It is illegal to specify 'e' extensions with 'f' and 'd'.
@@ -753,7 +731,8 @@ Error RISCVISAInfo::checkDependency() {
}
static const char *ImpliedExtsV[] = {"zvl128b", "f", "d"};
-static const char *ImpliedExtsZfh[] = {"zfhmin"};
+static const char *ImpliedExtsZfhmin[] = {"f"};
+static const char *ImpliedExtsZfh[] = {"f"};
static const char *ImpliedExtsZve64d[] = {"zve64f"};
static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"};
static const char *ImpliedExtsZve64x[] = {"zve32x", "zvl64b"};
@@ -785,9 +764,11 @@ struct ImpliedExtsEntry {
bool operator<(StringRef Other) const { return Name < Other; }
};
+// Note: The table needs to be sorted by name.
static constexpr ImpliedExtsEntry ImpliedExts[] = {
{{"v"}, {ImpliedExtsV}},
{{"zfh"}, {ImpliedExtsZfh}},
+ {{"zfhmin"}, {ImpliedExtsZfhmin}},
{{"zk"}, {ImpliedExtsZk}},
{{"zkn"}, {ImpliedExtsZkn}},
{{"zks"}, {ImpliedExtsZks}},
@@ -810,8 +791,8 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = {
};
void RISCVISAInfo::updateImplication() {
- bool HasE = Exts.count("e") == 1;
- bool HasI = Exts.count("i") == 1;
+ bool HasE = Exts.count("e") != 0;
+ bool HasI = Exts.count("i") != 0;
// If not in e extension and i extension does not exist, i extension is
// implied
@@ -919,3 +900,15 @@ std::vector<std::string> RISCVISAInfo::toFeatureVector() const {
}
return FeatureVector;
}
+
+llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+RISCVISAInfo::postProcessAndChecking(std::unique_ptr<RISCVISAInfo> &&ISAInfo) {
+ ISAInfo->updateImplication();
+ ISAInfo->updateFLen();
+ ISAInfo->updateMinVLen();
+ ISAInfo->updateMaxELen();
+
+ if (Error Result = ISAInfo->checkDependency())
+ return std::move(Result);
+ return std::move(ISAInfo);
+}
diff --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp
index 5ce41c987029..1d61f2bf7525 100644
--- a/llvm/lib/Support/Signals.cpp
+++ b/llvm/lib/Support/Signals.cpp
@@ -15,7 +15,7 @@
#include "DebugOptions.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLArrayExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp
index 20dea8c302a5..a9afcc9db96a 100644
--- a/llvm/lib/Support/Triple.cpp
+++ b/llvm/lib/Support/Triple.cpp
@@ -7,14 +7,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Triple.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLArrayExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/ARMTargetParser.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/SwapByteOrder.h"
-#include "llvm/Support/ARMTargetParser.h"
#include "llvm/Support/VersionTuple.h"
#include <cassert>
#include <cstring>
diff --git a/llvm/lib/Support/Valgrind.cpp b/llvm/lib/Support/Valgrind.cpp
index 3cf41faeb55d..5994656c5c03 100644
--- a/llvm/lib/Support/Valgrind.cpp
+++ b/llvm/lib/Support/Valgrind.cpp
@@ -12,9 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#include <stddef.h>
#include "llvm/Support/Valgrind.h"
#include "llvm/Config/config.h"
-#include <cstddef>
#if HAVE_VALGRIND_VALGRIND_H
#include <valgrind/valgrind.h>
diff --git a/llvm/lib/Support/Windows/Host.inc b/llvm/lib/Support/Windows/Host.inc
index 5583db909045..fa6b00f19b9a 100644
--- a/llvm/lib/Support/Windows/Host.inc
+++ b/llvm/lib/Support/Windows/Host.inc
@@ -10,6 +10,9 @@
//
//===----------------------------------------------------------------------===//
+// We need to include config.h here because LLVM_DEFAULT_TARGET_TRIPLE is not
+// defined in llvm-config.h if it is unset.
+#include "llvm/Config/config.h"
#include "llvm/Support/Windows/WindowsSupport.h"
#include <cstdio>
#include <string>
diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp
index e4b747b68bea..69d4fe96bee8 100644
--- a/llvm/lib/Support/raw_ostream.cpp
+++ b/llvm/lib/Support/raw_ostream.cpp
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLArrayExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Config/config.h"
#include "llvm/Support/Compiler.h"
diff --git a/llvm/lib/TableGen/DetailedRecordsBackend.cpp b/llvm/lib/TableGen/DetailedRecordsBackend.cpp
index e181f79b903d..500aa4c78225 100644
--- a/llvm/lib/TableGen/DetailedRecordsBackend.cpp
+++ b/llvm/lib/TableGen/DetailedRecordsBackend.cpp
@@ -13,15 +13,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Format.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
+#include <map>
+#include <memory>
#include <string>
#include <utility>
diff --git a/llvm/lib/TableGen/JSONBackend.cpp b/llvm/lib/TableGen/JSONBackend.cpp
index 8ddfd9f04524..e38903910275 100644
--- a/llvm/lib/TableGen/JSONBackend.cpp
+++ b/llvm/lib/TableGen/JSONBackend.cpp
@@ -11,12 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/JSON.h"
+#include "llvm/TableGen/Record.h"
#define DEBUG_TYPE "json-emitter"
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 762255b43136..1d5f130737ee 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -16,7 +16,6 @@
#include "llvm/TableGen/Main.h"
#include "TGParser.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -24,7 +23,6 @@
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include <algorithm>
-#include <cstdio>
#include <system_error>
using namespace llvm;
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index eb7d4838a9f6..58d8c9936896 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -10,16 +10,15 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/TableGen/Record.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSet.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
@@ -29,11 +28,10 @@
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
#include <cassert>
#include <cstdint>
-#include <memory>
#include <map>
+#include <memory>
#include <string>
#include <utility>
#include <vector>
@@ -2289,8 +2287,8 @@ bool RecordVal::setValue(Init *V, SMLoc NewLoc) {
return false;
}
-#include "llvm/TableGen/Record.h"
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+#include "llvm/TableGen/Record.h"
LLVM_DUMP_METHOD void RecordVal::dump() const { errs() << *this; }
#endif
diff --git a/llvm/lib/TableGen/SetTheory.cpp b/llvm/lib/TableGen/SetTheory.cpp
index f7ba75243c15..3db46aae6d96 100644
--- a/llvm/lib/TableGen/SetTheory.cpp
+++ b/llvm/lib/TableGen/SetTheory.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/TableGen/SetTheory.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Format.h"
@@ -21,7 +21,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/SetTheory.h"
#include <algorithm>
#include <cstdint>
#include <string>
diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp
index 3709a375ed1b..90646a0c642d 100644
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@@ -11,8 +11,8 @@
//===----------------------------------------------------------------------===//
#include "TGParser.h"
+#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/None.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
@@ -21,7 +21,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/SourceMgr.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
diff --git a/llvm/lib/TableGen/TableGenBackendSkeleton.cpp b/llvm/lib/TableGen/TableGenBackendSkeleton.cpp
index 4ce88e003e65..0ba00c8d8ab1 100644
--- a/llvm/lib/TableGen/TableGenBackendSkeleton.cpp
+++ b/llvm/lib/TableGen/TableGenBackendSkeleton.cpp
@@ -10,22 +10,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/TableGen/TableGenBackend.h"
-#include <algorithm>
-#include <set>
-#include <string>
-#include <vector>
#define DEBUG_TYPE "skeleton-emitter"
+namespace llvm {
+class RecordKeeper;
+class raw_ostream;
+} // namespace llvm
+
using namespace llvm;
namespace {
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index b87468d5c8de..9a04b28a8b8f 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -972,6 +972,10 @@ def ProcessorFeatures {
list<SubtargetFeature> X1 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
FeatureNEON, FeatureRCPC, FeaturePerfMon,
FeatureSPE, FeatureFullFP16, FeatureDotProd];
+ list<SubtargetFeature> X1C = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeatureRCPC, FeaturePerfMon,
+ FeatureSPE, FeatureFullFP16, FeatureDotProd,
+ FeaturePAuth];
list<SubtargetFeature> X2 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon,
FeatureMatMulInt8, FeatureBF16, FeatureAM,
FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
@@ -1086,6 +1090,8 @@ def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82,
[TuneR82]>;
def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1,
[TuneX1]>;
+def : ProcessorModel<"cortex-x1c", CortexA57Model, ProcessorFeatures.X1C,
+ [TuneX1]>;
def : ProcessorModel<"cortex-x2", CortexA57Model, ProcessorFeatures.X2,
[TuneX2]>;
def : ProcessorModel<"neoverse-e1", CortexA53Model,
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 85a9c04a3fef..b54a0eaba7d1 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -95,6 +95,8 @@ public:
void LowerJumpTableDest(MCStreamer &OutStreamer, const MachineInstr &MI);
+ void LowerMOPS(MCStreamer &OutStreamer, const MachineInstr &MI);
+
void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI);
void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
@@ -936,6 +938,43 @@ void AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer,
.addImm(Size == 4 ? 0 : 2));
}
+void AArch64AsmPrinter::LowerMOPS(llvm::MCStreamer &OutStreamer,
+ const llvm::MachineInstr &MI) {
+ unsigned Opcode = MI.getOpcode();
+ assert(STI->hasMOPS());
+ assert(STI->hasMTE() || Opcode != AArch64::MOPSMemorySetTaggingPseudo);
+
+ const auto Ops = [Opcode]() -> std::array<unsigned, 3> {
+ if (Opcode == AArch64::MOPSMemoryCopyPseudo)
+ return {AArch64::CPYFP, AArch64::CPYFM, AArch64::CPYFE};
+ if (Opcode == AArch64::MOPSMemoryMovePseudo)
+ return {AArch64::CPYP, AArch64::CPYM, AArch64::CPYE};
+ if (Opcode == AArch64::MOPSMemorySetPseudo)
+ return {AArch64::SETP, AArch64::SETM, AArch64::SETE};
+ if (Opcode == AArch64::MOPSMemorySetTaggingPseudo)
+ return {AArch64::SETGP, AArch64::SETGM, AArch64::MOPSSETGE};
+ llvm_unreachable("Unhandled memory operation pseudo");
+ }();
+ const bool IsSet = Opcode == AArch64::MOPSMemorySetPseudo ||
+ Opcode == AArch64::MOPSMemorySetTaggingPseudo;
+
+ for (auto Op : Ops) {
+ int i = 0;
+ auto MCIB = MCInstBuilder(Op);
+ // Destination registers
+ MCIB.addReg(MI.getOperand(i++).getReg());
+ MCIB.addReg(MI.getOperand(i++).getReg());
+ if (!IsSet)
+ MCIB.addReg(MI.getOperand(i++).getReg());
+ // Input registers
+ MCIB.addReg(MI.getOperand(i++).getReg());
+ MCIB.addReg(MI.getOperand(i++).getReg());
+ MCIB.addReg(MI.getOperand(i++).getReg());
+
+ EmitToStreamer(OutStreamer, MCIB);
+ }
+}
+
void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI) {
unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes();
@@ -1363,6 +1402,13 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
emitFMov0(*MI);
return;
+ case AArch64::MOPSMemoryCopyPseudo:
+ case AArch64::MOPSMemoryMovePseudo:
+ case AArch64::MOPSMemorySetPseudo:
+ case AArch64::MOPSMemorySetTaggingPseudo:
+ LowerMOPS(*OutStreamer, *MI);
+ return;
+
case TargetOpcode::STACKMAP:
return LowerSTACKMAP(*OutStreamer, SM, *MI);
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 109b739528bf..b0f739cc26e6 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -709,20 +709,24 @@ bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
bool AArch64ExpandPseudo::expandCALL_RVMARKER(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
- // Expand CALL_RVMARKER pseudo to a branch, followed by the special `mov x29,
- // x29` marker. Mark the sequence as bundle, to avoid passes moving other code
- // in between.
+ // Expand CALL_RVMARKER pseudo to:
+ // - a branch to the call target, followed by
+ // - the special `mov x29, x29` marker, and
+ // - another branch, to the runtime function
+ // Mark the sequence as bundle, to avoid passes moving other code in between.
MachineInstr &MI = *MBBI;
MachineInstr *OriginalCall;
- MachineOperand &CallTarget = MI.getOperand(0);
+ MachineOperand &RVTarget = MI.getOperand(0);
+ MachineOperand &CallTarget = MI.getOperand(1);
assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
"invalid operand for regular call");
+ assert(RVTarget.isGlobal() && "invalid operand for attached call");
unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
OriginalCall->addOperand(CallTarget);
- unsigned RegMaskStartIdx = 1;
+ unsigned RegMaskStartIdx = 2;
// Skip register arguments. Those are added during ISel, but are not
// needed for the concrete branch.
while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
@@ -736,17 +740,22 @@ bool AArch64ExpandPseudo::expandCALL_RVMARKER(
llvm::drop_begin(MI.operands(), RegMaskStartIdx))
OriginalCall->addOperand(MO);
- auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
.addReg(AArch64::FP, RegState::Define)
.addReg(AArch64::XZR)
.addReg(AArch64::FP)
- .addImm(0)
+ .addImm(0);
+
+ auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))
+ .add(RVTarget)
.getInstr();
+
if (MI.shouldUpdateCallSiteInfo())
- MBB.getParent()->moveCallSiteInfo(&MI, Marker);
+ MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall);
+
MI.eraseFromParent();
finalizeBundle(MBB, OriginalCall->getIterator(),
- std::next(Marker->getIterator()));
+ std::next(RVCall->getIterator()));
return true;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a26bbc77f248..c539c8617d99 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -29,6 +29,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
@@ -938,19 +939,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
- MaxStoresPerMemset = Subtarget->requiresStrictAlign()
- ? MaxStoresPerMemsetOptSize : 32;
+ MaxStoresPerMemset =
+ Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32;
MaxGluedStoresPerMemcpy = 4;
MaxStoresPerMemcpyOptSize = 4;
- MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
- ? MaxStoresPerMemcpyOptSize : 16;
+ MaxStoresPerMemcpy =
+ Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16;
- MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
+ MaxStoresPerMemmoveOptSize = 4;
+ MaxStoresPerMemmove = 4;
MaxLoadsPerMemcmpOptSize = 4;
- MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
- ? MaxLoadsPerMemcmpOptSize : 8;
+ MaxLoadsPerMemcmp =
+ Subtarget->requiresStrictAlign() ? MaxLoadsPerMemcmpOptSize : 8;
setStackPointerRegisterToSaveRestore(AArch64::SP);
@@ -1426,6 +1428,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
}
+ if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
+ // Only required for llvm.aarch64.mops.memset.tag
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
+ }
+
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
}
@@ -2201,7 +2208,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::INSR)
MAKE_CASE(AArch64ISD::PTEST)
MAKE_CASE(AArch64ISD::PTRUE)
- MAKE_CASE(AArch64ISD::PFALSE)
MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
@@ -2268,6 +2274,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::UADDLP)
MAKE_CASE(AArch64ISD::CALL_RVMARKER)
MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL)
+ MAKE_CASE(AArch64ISD::MOPS_MEMSET)
+ MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING)
+ MAKE_CASE(AArch64ISD::MOPS_MEMCOPY)
+ MAKE_CASE(AArch64ISD::MOPS_MEMMOVE)
}
#undef MAKE_CASE
return nullptr;
@@ -3746,6 +3756,10 @@ SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
if (OpVT != MVT::f16 && OpVT != MVT::bf16)
return SDValue();
+ // Bitcasts between f16 and bf16 are legal.
+ if (ArgVT == MVT::f16 || ArgVT == MVT::bf16)
+ return Op;
+
assert(ArgVT == MVT::i16);
SDLoc DL(Op);
@@ -4056,6 +4070,39 @@ static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret);
}
+SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned IntNo = Op.getConstantOperandVal(1);
+ switch (IntNo) {
+ default:
+ return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::aarch64_mops_memset_tag: {
+ auto Node = cast<MemIntrinsicSDNode>(Op.getNode());
+ SDLoc DL(Op);
+ SDValue Chain = Node->getChain();
+ SDValue Dst = Op.getOperand(2);
+ SDValue Val = Op.getOperand(3);
+ Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i64);
+ SDValue Size = Op.getOperand(4);
+ auto Alignment = Node->getMemOperand()->getAlign();
+ bool IsVol = Node->isVolatile();
+ auto DstPtrInfo = Node->getPointerInfo();
+
+ const auto &SDI =
+ static_cast<const AArch64SelectionDAGInfo &>(DAG.getSelectionDAGInfo());
+ SDValue MS =
+ SDI.EmitMOPS(AArch64ISD::MOPS_MEMSET_TAGGING, DAG, DL, Chain, Dst, Val,
+ Size, Alignment, IsVol, DstPtrInfo, MachinePointerInfo{});
+
+ // MOPS_MEMSET_TAGGING has 3 results (DstWb, SizeWb, Chain) whereas the
+ // intrinsic has 2. So hide SizeWb using MERGE_VALUES. Otherwise
+ // LowerOperationWrapper will complain that the number of results has
+ // changed.
+ return DAG.getMergeValues({MS.getValue(0), MS.getValue(2)}, DL);
+ }
+ }
+}
+
SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -5123,6 +5170,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::MULHU:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED,
/*OverrideNEON=*/true);
+ case ISD::INTRINSIC_W_CHAIN:
+ return LowerINTRINSIC_W_CHAIN(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::ATOMIC_STORE:
@@ -6475,12 +6524,18 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
unsigned CallOpc = AArch64ISD::CALL;
// Calls with operand bundle "clang.arc.attachedcall" are special. They should
- // be expanded to the call, directly followed by a special marker sequence.
- // Use the CALL_RVMARKER to do that.
+ // be expanded to the call, directly followed by a special marker sequence and
+ // a call to an ObjC library function. Use CALL_RVMARKER to do that.
if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
assert(!IsTailCall &&
"tail calls cannot be marked with clang.arc.attachedcall");
CallOpc = AArch64ISD::CALL_RVMARKER;
+
+ // Add a target global address for the retainRV/claimRV runtime function
+ // just before the call target.
+ Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
+ auto GA = DAG.getTargetGlobalAddress(ARCFn, DL, PtrVT);
+ Ops.insert(Ops.begin() + 1, GA);
}
// Returns a chain and a flag for retval copy to use.
@@ -9985,8 +10040,9 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
// The only legal i1 vectors are SVE vectors, so we can use SVE-specific
// lowering code.
if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
+ // We can hande the zero case during isel.
if (ConstVal->isZero())
- return DAG.getNode(AArch64ISD::PFALSE, dl, VT);
+ return Op;
if (ConstVal->isOne())
return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
}
@@ -11869,6 +11925,19 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
return true;
}
+ case Intrinsic::aarch64_mops_memset_tag: {
+ Value *Dst = I.getArgOperand(0);
+ Value *Val = I.getArgOperand(1);
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(Val->getType());
+ Info.ptrVal = Dst;
+ Info.offset = 0;
+ Info.align = I.getParamAlign(0).valueOrOne();
+ Info.flags = MachineMemOperand::MOStore;
+ // The size of the memory being operated on is unknown at this point
+ Info.size = MemoryLocation::UnknownSize;
+ return true;
+ }
default:
break;
}
@@ -15092,7 +15161,7 @@ static bool isAllInactivePredicate(SDValue N) {
while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
N = N.getOperand(0);
- return N.getOpcode() == AArch64ISD::PFALSE;
+ return ISD::isConstantSplatVectorAllZeros(N.getNode());
}
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) {
@@ -15393,6 +15462,52 @@ static SDValue performIntrinsicCombine(SDNode *N,
return SDValue();
}
+static bool isCheapToExtend(const SDValue &N) {
+ unsigned OC = N->getOpcode();
+ return OC == ISD::LOAD || OC == ISD::MLOAD ||
+ ISD::isConstantSplatVectorAllZeros(N.getNode());
+}
+
+static SDValue
+performSignExtendSetCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ // If we have (sext (setcc A B)) and A and B are cheap to extend,
+ // we can move the sext into the arguments and have the same result. For
+ // example, if A and B are both loads, we can make those extending loads and
+ // avoid an extra instruction. This pattern appears often in VLS code
+ // generation where the inputs to the setcc have a different size to the
+ // instruction that wants to use the result of the setcc.
+ assert(N->getOpcode() == ISD::SIGN_EXTEND &&
+ N->getOperand(0)->getOpcode() == ISD::SETCC);
+ const SDValue SetCC = N->getOperand(0);
+
+ const SDValue CCOp0 = SetCC.getOperand(0);
+ const SDValue CCOp1 = SetCC.getOperand(1);
+ if (!CCOp0->getValueType(0).isInteger() ||
+ !CCOp1->getValueType(0).isInteger())
+ return SDValue();
+
+ ISD::CondCode Code =
+ cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get();
+
+ ISD::NodeType ExtType =
+ isSignedIntSetCC(Code) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+
+ if (isCheapToExtend(SetCC.getOperand(0)) &&
+ isCheapToExtend(SetCC.getOperand(1))) {
+ const SDValue Ext1 =
+ DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp0);
+ const SDValue Ext2 =
+ DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp1);
+
+ return DAG.getSetCC(
+ SDLoc(SetCC), N->getValueType(0), Ext1, Ext2,
+ cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get());
+ }
+
+ return SDValue();
+}
+
static SDValue performExtendCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
@@ -15411,6 +15526,12 @@ static SDValue performExtendCombine(SDNode *N,
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
}
+
+ if (N->getValueType(0).isFixedLengthVector() &&
+ N->getOpcode() == ISD::SIGN_EXTEND &&
+ N->getOperand(0)->getOpcode() == ISD::SETCC)
+ return performSignExtendSetCCCombine(N, DCI, DAG);
+
return SDValue();
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index ca6c70297c0b..2138c0ffe70a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -323,7 +323,6 @@ enum NodeType : unsigned {
INSR,
PTEST,
PTRUE,
- PFALSE,
BITREVERSE_MERGE_PASSTHRU,
BSWAP_MERGE_PASSTHRU,
@@ -453,6 +452,12 @@ enum NodeType : unsigned {
LDP,
STP,
STNP,
+
+ // Memory Operations
+ MOPS_MEMSET,
+ MOPS_MEMSET_TAGGING,
+ MOPS_MEMCOPY,
+ MOPS_MEMMOVE,
};
} // end namespace AArch64ISD
@@ -890,6 +895,7 @@ private:
SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
bool isEligibleForTailCallOptimization(
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 93c17133c845..a9191924129c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -93,9 +93,18 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
// before the assembly printer.
unsigned NumBytes = 0;
const MCInstrDesc &Desc = MI.getDesc();
+
+ // Size should be preferably set in
+ // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
+ // Specific cases handle instructions of variable sizes
switch (Desc.getOpcode()) {
default:
- // Anything not explicitly designated otherwise is a normal 4-byte insn.
+ if (Desc.getSize())
+ return Desc.getSize();
+
+ // Anything not explicitly designated otherwise (i.e. pseudo-instructions
+ // with fixed constant size but not specified in .td file) is a normal
+ // 4-byte insn.
NumBytes = 4;
break;
case TargetOpcode::STACKMAP:
@@ -115,29 +124,9 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
if (NumBytes == 0)
NumBytes = 4;
break;
- case AArch64::TLSDESC_CALLSEQ:
- // This gets lowered to an instruction sequence which takes 16 bytes
- NumBytes = 16;
- break;
- case AArch64::SpeculationBarrierISBDSBEndBB:
- // This gets lowered to 2 4-byte instructions.
- NumBytes = 8;
- break;
- case AArch64::SpeculationBarrierSBEndBB:
- // This gets lowered to 1 4-byte instructions.
- NumBytes = 4;
- break;
- case AArch64::JumpTableDest32:
- case AArch64::JumpTableDest16:
- case AArch64::JumpTableDest8:
- NumBytes = 12;
- break;
case AArch64::SPACE:
NumBytes = MI.getOperand(1).getImm();
break;
- case AArch64::StoreSwiftAsyncContext:
- NumBytes = 20;
- break;
case TargetOpcode::BUNDLE:
NumBytes = getInstBundleLength(MI);
break;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index c8a697c8b82f..83bf89ff97c5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -780,6 +780,7 @@ def : Pat<(AArch64LOADgot texternalsym:$addr),
def : Pat<(AArch64LOADgot tconstpool:$addr),
(LOADgot tconstpool:$addr)>;
+// In general these get lowered into a sequence of three 4-byte instructions.
// 32-bit jump table destination is actually only 2 instructions since we can
// use the table itself as a PC-relative base. But optimization occurs after
// branch relaxation so be pessimistic.
@@ -815,8 +816,12 @@ let hasSideEffects = 1, isCodeGenOnly = 1 in {
// SpeculationBarrierEndBB must only be used after an unconditional control
// flow, i.e. after a terminator for which isBarrier is True.
let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
+ // This gets lowered to a pair of 4-byte instructions.
+ let Size = 8 in
def SpeculationBarrierISBDSBEndBB
: Pseudo<(outs), (ins), []>, Sched<[]>;
+ // This gets lowered to a 4-byte instruction.
+ let Size = 4 in
def SpeculationBarrierSBEndBB
: Pseudo<(outs), (ins), []>, Sched<[]>;
}
@@ -2324,8 +2329,8 @@ def : Pat<(AArch64call GPR64noip:$Rn),
(BLRNoIP GPR64noip:$Rn)>,
Requires<[SLSBLRMitigation]>;
-def : Pat<(AArch64call_rvmarker GPR64:$Rn),
- (BLR_RVMARKER GPR64:$Rn)>,
+def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn),
+ (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>,
Requires<[NoSLSBLRMitigation]>;
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
@@ -2356,7 +2361,8 @@ def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {}
// FIXME: maybe the scratch register used shouldn't be fixed to X1?
// FIXME: can "hasSideEffects be dropped?
-let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
+// This gets lowered to an instruction sequence which takes 16 bytes
+let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, Size = 16,
isCodeGenOnly = 1 in
def TLSDESC_CALLSEQ
: Pseudo<(outs), (ins i64imm:$sym),
@@ -7546,6 +7552,9 @@ def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))),
def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>;
+def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>;
+
let Predicates = [IsLE] in {
def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
@@ -8330,26 +8339,67 @@ let Predicates = [HasLS64] in {
}
let Predicates = [HasMOPS] in {
- defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">;
- defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">;
- defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">;
+ let Defs = [NZCV] in {
+ defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">;
+
+ defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">;
+
+ defm SETP : MOPSMemorySetInsns<0b00, "setp">;
+ }
+ let Uses = [NZCV] in {
+ defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">;
+ defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">;
- defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">;
- defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">;
- defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">;
+ defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">;
+ defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">;
- defm SETP : MOPSMemorySetInsns<0b00, "setp">;
- defm SETM : MOPSMemorySetInsns<0b01, "setm">;
- defm SETE : MOPSMemorySetInsns<0b10, "sete">;
+ defm SETM : MOPSMemorySetInsns<0b01, "setm">;
+ defm SETE : MOPSMemorySetInsns<0b10, "sete">;
+ }
}
let Predicates = [HasMOPS, HasMTE] in {
- defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">;
- defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">;
- // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td
- defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">;
+ let Defs = [NZCV] in {
+ defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">;
+ }
+ let Uses = [NZCV] in {
+ defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">;
+ // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td
+ defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">;
+ }
+}
+
+// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain
+// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain
+def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>;
+def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>;
+def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>;
+def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>;
+def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>;
+
+// MOPS operations always contain three 4-byte instructions
+let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in {
+ let mayLoad = 1 in {
+ def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
+ (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
+ [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
+ def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
+ (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
+ [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
+ }
+ let mayLoad = 0 in {
+ def MOPSMemorySetPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
+ (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
+ [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
+ }
+}
+let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in {
+ def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
+ (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
+ [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
}
-let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1 in
+// This gets lowered into an instruction sequence of 20 bytes
+let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in
def StoreSwiftAsyncContext
: Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
[]>, Sched<[]>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 73a680465f6f..1d162610de9c 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -292,7 +292,13 @@ def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [
SDTCisSameAs<0,1>, SDTCisSameAs<1,2>
]>;
-def AArch64bic : SDNode<"AArch64ISD::BIC", SDT_AArch64Arith_Unpred>;
+def AArch64bic_node : SDNode<"AArch64ISD::BIC", SDT_AArch64Arith_Unpred>;
+
+def AArch64bic : PatFrags<(ops node:$op1, node:$op2),
+ [(and node:$op1, (xor node:$op2, (AArch64dup (i32 -1)))),
+ (and node:$op1, (xor node:$op2, (AArch64dup (i64 -1)))),
+ (and node:$op1, (xor node:$op2, (SVEAllActive))),
+ (AArch64bic_node node:$op1, node:$op2)]>;
let Predicates = [HasSVE] in {
defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
@@ -734,14 +740,14 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;
- defm AND_PPzPP : sve_int_pred_log_and<0b0000, "and", int_aarch64_sve_and_z>;
- defm BIC_PPzPP : sve_int_pred_log<0b0001, "bic", int_aarch64_sve_bic_z>;
+ defm AND_PPzPP : sve_int_pred_log_v2<0b0000, "and", int_aarch64_sve_and_z, and>;
+ defm BIC_PPzPP : sve_int_pred_log_v2<0b0001, "bic", int_aarch64_sve_bic_z, AArch64bic>;
defm EOR_PPzPP : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z, xor>;
- defm SEL_PPPP : sve_int_pred_log<0b0011, "sel", vselect>;
+ defm SEL_PPPP : sve_int_pred_log_v2<0b0011, "sel", vselect, or>;
defm ANDS_PPzPP : sve_int_pred_log<0b0100, "ands", null_frag>;
defm BICS_PPzPP : sve_int_pred_log<0b0101, "bics", null_frag>;
defm EORS_PPzPP : sve_int_pred_log<0b0110, "eors", null_frag>;
- defm ORR_PPzPP : sve_int_pred_log<0b1000, "orr", int_aarch64_sve_orr_z, or>;
+ defm ORR_PPzPP : sve_int_pred_log<0b1000, "orr", int_aarch64_sve_orr_z>;
defm ORN_PPzPP : sve_int_pred_log<0b1001, "orn", int_aarch64_sve_orn_z>;
defm NOR_PPzPP : sve_int_pred_log<0b1010, "nor", int_aarch64_sve_nor_z>;
defm NAND_PPzPP : sve_int_pred_log<0b1011, "nand", int_aarch64_sve_nand_z>;
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index d2d84b2a3f6d..893269c1a7ef 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -15,15 +15,95 @@ using namespace llvm;
#define DEBUG_TYPE "aarch64-selectiondag-info"
+SDValue AArch64SelectionDAGInfo::EmitMOPS(AArch64ISD::NodeType SDOpcode,
+ SelectionDAG &DAG, const SDLoc &DL,
+ SDValue Chain, SDValue Dst,
+ SDValue SrcOrValue, SDValue Size,
+ Align Alignment, bool isVolatile,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+
+ // Get the constant size of the copy/set.
+ uint64_t ConstSize = 0;
+ if (auto *C = dyn_cast<ConstantSDNode>(Size))
+ ConstSize = C->getZExtValue();
+
+ const bool IsSet = SDOpcode == AArch64ISD::MOPS_MEMSET ||
+ SDOpcode == AArch64ISD::MOPS_MEMSET_TAGGING;
+
+ const auto MachineOpcode = [&]() {
+ switch (SDOpcode) {
+ case AArch64ISD::MOPS_MEMSET:
+ return AArch64::MOPSMemorySetPseudo;
+ case AArch64ISD::MOPS_MEMSET_TAGGING:
+ return AArch64::MOPSMemorySetTaggingPseudo;
+ case AArch64ISD::MOPS_MEMCOPY:
+ return AArch64::MOPSMemoryCopyPseudo;
+ case AArch64ISD::MOPS_MEMMOVE:
+ return AArch64::MOPSMemoryMovePseudo;
+ default:
+ llvm_unreachable("Unhandled MOPS ISD Opcode");
+ }
+ }();
+
+ MachineMemOperand::Flags Flags = MachineMemOperand::MOStore;
+ if (isVolatile)
+ Flags |= MachineMemOperand::MOVolatile;
+ if (!IsSet)
+ Flags |= MachineMemOperand::MOLoad;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ auto *DstOp =
+ MF.getMachineMemOperand(DstPtrInfo, Flags, ConstSize, Alignment);
+ auto *SrcOp =
+ MF.getMachineMemOperand(SrcPtrInfo, Flags, ConstSize, Alignment);
+
+ if (IsSet) {
+ // Extend value to i64 if required
+ if (SrcOrValue.getValueType() != MVT::i64)
+ SrcOrValue = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, SrcOrValue);
+ SDValue Ops[] = {Dst, Size, SrcOrValue, Chain};
+ const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::Other};
+ MachineSDNode *Node = DAG.getMachineNode(MachineOpcode, DL, ResultTys, Ops);
+ DAG.setNodeMemRefs(Node, {DstOp});
+ return SDValue(Node, 2);
+ } else {
+ SDValue Ops[] = {Dst, SrcOrValue, Size, Chain};
+ const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::i64, MVT::Other};
+ MachineSDNode *Node = DAG.getMachineNode(MachineOpcode, DL, ResultTys, Ops);
+ DAG.setNodeMemRefs(Node, {DstOp, SrcOp});
+ return SDValue(Node, 3);
+ }
+}
+
+SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
+ const AArch64Subtarget &STI =
+ DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
+ if (STI.hasMOPS())
+ return EmitMOPS(AArch64ISD::MOPS_MEMCOPY, DAG, DL, Chain, Dst, Src, Size,
+ Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);
+ return SDValue();
+}
+
SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, Align Alignment, bool isVolatile,
MachinePointerInfo DstPtrInfo) const {
+ const AArch64Subtarget &STI =
+ DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
+
+ if (STI.hasMOPS()) {
+ return EmitMOPS(AArch64ISD::MOPS_MEMSET, DAG, dl, Chain, Dst, Src, Size,
+ Alignment, isVolatile, DstPtrInfo, MachinePointerInfo{});
+ }
+
// Check to see if there is a specialized entry-point for memory zeroing.
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
- const AArch64Subtarget &STI =
- DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
const char *bzeroName =
(V && V->isZero())
? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
@@ -55,6 +135,19 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
return SDValue();
}
+SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemmove(
+ SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, Align Alignment, bool isVolatile,
+ MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
+ const AArch64Subtarget &STI =
+ DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
+ if (STI.hasMOPS()) {
+ return EmitMOPS(AArch64ISD::MOPS_MEMMOVE, DAG, dl, Chain, Dst, Src, Size,
+ Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);
+ }
+ return SDValue();
+}
+
static const int kSetTagLoopThreshold = 176;
static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 7d53bd456975..47fe3bf7dcf5 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -19,11 +19,30 @@ namespace llvm {
class AArch64SelectionDAGInfo : public SelectionDAGTargetInfo {
public:
+ SDValue EmitMOPS(AArch64ISD::NodeType SDOpcode, SelectionDAG &DAG,
+ const SDLoc &DL, SDValue Chain, SDValue Dst,
+ SDValue SrcOrValue, SDValue Size, Align Alignment,
+ bool isVolatile, MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
+
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, Align Alignment,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const override;
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, Align Alignment,
bool isVolatile,
MachinePointerInfo DstPtrInfo) const override;
+ SDValue
+ EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
+ SDValue Dst, SDValue Src, SDValue Size,
+ Align Alignment, bool isVolatile,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const override;
+
SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Op1, SDValue Op2,
MachinePointerInfo DstPtrInfo,
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index a4f4b8582182..8a7e20237271 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -99,6 +99,7 @@ void AArch64Subtarget::initializeProperties() {
case CortexA78C:
case CortexR82:
case CortexX1:
+ case CortexX1C:
PrefFunctionLogAlignment = 4;
break;
case CortexA510:
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 3e3c0f6aba15..7b2bbad30f85 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -63,6 +63,7 @@ public:
CortexA710,
CortexR82,
CortexX1,
+ CortexX1C,
CortexX2,
ExynosM3,
Falkor,
@@ -217,7 +218,6 @@ protected:
bool HasETE = false;
bool HasTRBE = false;
bool HasBRBE = false;
- bool HasPAUTH = false;
bool HasSPE_EEF = false;
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
@@ -510,7 +510,6 @@ public:
bool hasRandGen() const { return HasRandGen; }
bool hasMTE() const { return HasMTE; }
bool hasTME() const { return HasTME; }
- bool hasPAUTH() const { return HasPAUTH; }
// Arm SVE2 extensions
bool hasSVE2AES() const { return HasSVE2AES; }
bool hasSVE2SM4() const { return HasSVE2SM4; }
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a4d666a0a3c2..b2ffdf949d8b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1886,14 +1886,21 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
m_Value())))
VecPred = CurrentPred;
}
- // Check if we have a compare/select chain that can be lowered using CMxx &
- // BFI pair.
- if (CmpInst::isIntPredicate(VecPred)) {
- static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
- MVT::v8i16, MVT::v2i32, MVT::v4i32,
- MVT::v2i64};
+ // Check if we have a compare/select chain that can be lowered using
+ // a (F)CMxx & BFI pair.
+ if (CmpInst::isIntPredicate(VecPred) || VecPred == CmpInst::FCMP_OLE ||
+ VecPred == CmpInst::FCMP_OLT || VecPred == CmpInst::FCMP_OGT ||
+ VecPred == CmpInst::FCMP_OGE || VecPred == CmpInst::FCMP_OEQ ||
+ VecPred == CmpInst::FCMP_UNE) {
+ static const auto ValidMinMaxTys = {
+ MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
+ MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
+ static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
+
auto LT = TLI->getTypeLegalizationCost(DL, ValTy);
- if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))
+ if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }) ||
+ (ST->hasFullFP16() &&
+ any_of(ValidFP16MinMaxTys, [&LT](MVT M) { return M == LT.second; })))
return LT.first;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 1f546ad50d57..703e356f016d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -192,6 +192,7 @@ private:
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
unsigned emitConstantPoolEntry(const Constant *CPVal,
@@ -3424,6 +3425,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_VECREDUCE_FADD:
case TargetOpcode::G_VECREDUCE_ADD:
return selectReduction(I, MRI);
+ case TargetOpcode::G_MEMCPY:
+ case TargetOpcode::G_MEMCPY_INLINE:
+ case TargetOpcode::G_MEMMOVE:
+ case TargetOpcode::G_MEMSET:
+ assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
+ return selectMOPS(I, MRI);
}
return false;
@@ -3481,6 +3488,64 @@ bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
return false;
}
+bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
+ MachineRegisterInfo &MRI) {
+ unsigned Mopcode;
+ switch (GI.getOpcode()) {
+ case TargetOpcode::G_MEMCPY:
+ case TargetOpcode::G_MEMCPY_INLINE:
+ Mopcode = AArch64::MOPSMemoryCopyPseudo;
+ break;
+ case TargetOpcode::G_MEMMOVE:
+ Mopcode = AArch64::MOPSMemoryMovePseudo;
+ break;
+ case TargetOpcode::G_MEMSET:
+ // For tagged memset see llvm.aarch64.mops.memset.tag
+ Mopcode = AArch64::MOPSMemorySetPseudo;
+ break;
+ }
+
+ auto &DstPtr = GI.getOperand(0);
+ auto &SrcOrVal = GI.getOperand(1);
+ auto &Size = GI.getOperand(2);
+
+ // Create copies of the registers that can be clobbered.
+ const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
+ const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
+ const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
+
+ const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
+ const auto &SrcValRegClass =
+ IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
+
+ // Constrain to specific registers
+ RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
+ RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
+ RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
+
+ MIB.buildCopy(DstPtrCopy, DstPtr);
+ MIB.buildCopy(SrcValCopy, SrcOrVal);
+ MIB.buildCopy(SizeCopy, Size);
+
+ // New instruction uses the copied registers because it must update them.
+ // The defs are not used since they don't exist in G_MEM*. They are still
+ // tied.
+ // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
+ Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
+ Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ if (IsSet) {
+ MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
+ {DstPtrCopy, SizeCopy, SrcValCopy});
+ } else {
+ Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
+ MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
+ {DstPtrCopy, SrcValCopy, SizeCopy});
+ }
+
+ GI.eraseFromParent();
+ return true;
+}
+
bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
@@ -5375,6 +5440,36 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
break;
}
+ case Intrinsic::aarch64_mops_memset_tag: {
+ // Transform
+ // %dst:gpr(p0) = \
+ // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
+ // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
+ // where %dst is updated, into
+ // %Rd:GPR64common, %Rn:GPR64) = \
+ // MOPSMemorySetTaggingPseudo \
+ // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
+ // where Rd and Rn are tied.
+ // It is expected that %val has been extended to s64 in legalization.
+ // Note that the order of the size/value operands are swapped.
+
+ Register DstDef = I.getOperand(0).getReg();
+ // I.getOperand(1) is the intrinsic function
+ Register DstUse = I.getOperand(2).getReg();
+ Register ValUse = I.getOperand(3).getReg();
+ Register SizeUse = I.getOperand(4).getReg();
+
+ // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
+ // Therefore an additional virtual register is requried for the updated size
+ // operand. This value is not accessible via the semantics of the intrinsic.
+ Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
+
+ auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
+ {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
+ Memset.cloneMemRefs(I);
+ constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI);
+ break;
+ }
}
I.eraseFromParent();
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index e8894e7933d6..e9df7e001d38 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -699,8 +699,28 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
- getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
- .libcall();
+ if (ST.hasMOPS()) {
+ // G_BZERO is not supported. Currently it is only emitted by
+ // PreLegalizerCombiner for G_MEMSET with zero constant.
+ getActionDefinitionsBuilder(G_BZERO).unsupported();
+
+ getActionDefinitionsBuilder(G_MEMSET)
+ .legalForCartesianProduct({p0}, {s64}, {s64})
+ .customForCartesianProduct({p0}, {s8}, {s64})
+ .immIdx(0); // Inform verifier imm idx 0 is handled.
+
+ getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
+ .legalForCartesianProduct({p0}, {p0}, {s64})
+ .immIdx(0); // Inform verifier imm idx 0 is handled.
+
+ // G_MEMCPY_INLINE does not have a tailcall immediate
+ getActionDefinitionsBuilder(G_MEMCPY_INLINE)
+ .legalForCartesianProduct({p0}, {p0}, {s64});
+
+ } else {
+ getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
+ .libcall();
+ }
// FIXME: Legal types are only legal with NEON.
getActionDefinitionsBuilder(G_ABS)
@@ -832,6 +852,11 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
return legalizeAtomicCmpxchg128(MI, MRI, Helper);
case TargetOpcode::G_CTTZ:
return legalizeCTTZ(MI, Helper);
+ case TargetOpcode::G_BZERO:
+ case TargetOpcode::G_MEMCPY:
+ case TargetOpcode::G_MEMMOVE:
+ case TargetOpcode::G_MEMSET:
+ return legalizeMemOps(MI, Helper);
}
llvm_unreachable("expected switch to return");
@@ -989,6 +1014,15 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MI.eraseFromParent();
return true;
}
+ case Intrinsic::aarch64_mops_memset_tag: {
+ assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+ // Zext the value to 64 bit
+ MachineIRBuilder MIB(MI);
+ auto &Value = MI.getOperand(3);
+ Register ZExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
+ Value.setReg(ZExtValueReg);
+ return true;
+ }
}
return true;
@@ -1359,3 +1393,20 @@ bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
MI.eraseFromParent();
return true;
}
+
+bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
+ LegalizerHelper &Helper) const {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+
+ // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
+ if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
+ // Zext the value operand to 64 bit
+ auto &Value = MI.getOperand(1);
+ Register ZExtValueReg =
+ MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
+ Value.setReg(ZExtValueReg);
+ return true;
+ }
+
+ return false;
+}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index e2c46f4b4c1f..973f96ff4775 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -56,6 +56,7 @@ private:
bool legalizeAtomicCmpxchg128(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const;
+ bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const;
const AArch64Subtarget *ST;
};
} // End llvm namespace.
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 574b22124957..9d4bdbe5d053 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -334,8 +334,6 @@ multiclass sve_int_ptrue<bits<3> opc, string asm, SDPatternOperator op> {
def SDT_AArch64PTrue : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
def AArch64ptrue : SDNode<"AArch64ISD::PTRUE", SDT_AArch64PTrue>;
-def SDT_AArch64PFalse : SDTypeProfile<1, 0, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>]>;
-def AArch64pfalse : SDNode<"AArch64ISD::PFALSE", SDT_AArch64PFalse>;
let Predicates = [HasSVEorStreamingSVE] in {
defm PTRUE : sve_int_ptrue<0b000, "ptrue", AArch64ptrue>;
@@ -614,10 +612,10 @@ class sve_int_pfalse<bits<6> opc, string asm>
multiclass sve_int_pfalse<bits<6> opc, string asm> {
def NAME : sve_int_pfalse<opc, asm>;
- def : Pat<(nxv16i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>;
- def : Pat<(nxv8i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>;
- def : Pat<(nxv4i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>;
- def : Pat<(nxv2i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>;
+ def : Pat<(nxv16i1 (splat_vector (i32 0))), (!cast<Instruction>(NAME))>;
+ def : Pat<(nxv8i1 (splat_vector (i32 0))), (!cast<Instruction>(NAME))>;
+ def : Pat<(nxv4i1 (splat_vector (i32 0))), (!cast<Instruction>(NAME))>;
+ def : Pat<(nxv2i1 (splat_vector (i32 0))), (!cast<Instruction>(NAME))>;
}
class sve_int_ptest<bits<6> opc, string asm>
@@ -773,7 +771,7 @@ multiclass sve_int_count_r_x64<bits<5> opc, string asm,
def : Pat<(i64 (op GPR64:$Rn, (nxv2i1 PPRAny:$Pg))),
(!cast<Instruction>(NAME # _D) PPRAny:$Pg, $Rn)>;
- // Combine cntp with combine_op
+ // combine_op(x, cntp(all_active, p)) ==> inst p, x
def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv16i1 (SVEAllActive)), (nxv16i1 PPRAny:$pred)))),
(!cast<Instruction>(NAME # _B) PPRAny:$pred, $Rn)>;
def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv8i1 (SVEAllActive)), (nxv8i1 PPRAny:$pred)))),
@@ -782,6 +780,16 @@ multiclass sve_int_count_r_x64<bits<5> opc, string asm,
(!cast<Instruction>(NAME # _S) PPRAny:$pred, $Rn)>;
def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv2i1 (SVEAllActive)), (nxv2i1 PPRAny:$pred)))),
(!cast<Instruction>(NAME # _D) PPRAny:$pred, $Rn)>;
+
+ // combine_op(x, cntp(p, p)) ==> inst p, x
+ def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv16i1 PPRAny:$pred), (nxv16i1 PPRAny:$pred)))),
+ (!cast<Instruction>(NAME # _B) PPRAny:$pred, $Rn)>;
+ def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv8i1 PPRAny:$pred), (nxv8i1 PPRAny:$pred)))),
+ (!cast<Instruction>(NAME # _H) PPRAny:$pred, $Rn)>;
+ def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv4i1 PPRAny:$pred), (nxv4i1 PPRAny:$pred)))),
+ (!cast<Instruction>(NAME # _S) PPRAny:$pred, $Rn)>;
+ def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv2i1 PPRAny:$pred), (nxv2i1 PPRAny:$pred)))),
+ (!cast<Instruction>(NAME # _D) PPRAny:$pred, $Rn)>;
}
class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
@@ -1633,15 +1641,18 @@ multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op,
!cast<Instruction>(NAME), PTRUE_D>;
}
-multiclass sve_int_pred_log_and<bits<4> opc, string asm, SDPatternOperator op> :
+// An instance of sve_int_pred_log_and but uses op_nopred's first operand as the
+// general predicate.
+multiclass sve_int_pred_log_v2<bits<4> opc, string asm, SDPatternOperator op,
+ SDPatternOperator op_nopred> :
sve_int_pred_log<opc, asm, op> {
- def : Pat<(nxv16i1 (and nxv16i1:$Op1, nxv16i1:$Op2)),
+ def : Pat<(nxv16i1 (op_nopred nxv16i1:$Op1, nxv16i1:$Op2)),
(!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
- def : Pat<(nxv8i1 (and nxv8i1:$Op1, nxv8i1:$Op2)),
+ def : Pat<(nxv8i1 (op_nopred nxv8i1:$Op1, nxv8i1:$Op2)),
(!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
- def : Pat<(nxv4i1 (and nxv4i1:$Op1, nxv4i1:$Op2)),
+ def : Pat<(nxv4i1 (op_nopred nxv4i1:$Op1, nxv4i1:$Op2)),
(!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
- def : Pat<(nxv2i1 (and nxv2i1:$Op1, nxv2i1:$Op2)),
+ def : Pat<(nxv2i1 (op_nopred nxv2i1:$Op1, nxv2i1:$Op2)),
(!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 958e8c9e5bc5..11cc1a01d248 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -11,6 +11,7 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
namespace llvm {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index 7d6845b287bc..bebf032b5535 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -14,9 +14,12 @@
#include "AMDGPU.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
#define DEBUG_TYPE "amdgpu-annotate-uniform"
@@ -29,6 +32,7 @@ class AMDGPUAnnotateUniformValues : public FunctionPass,
public InstVisitor<AMDGPUAnnotateUniformValues> {
LegacyDivergenceAnalysis *DA;
MemorySSA *MSSA;
+ AliasAnalysis *AA;
DenseMap<Value*, GetElementPtrInst*> noClobberClones;
bool isEntryFunc;
@@ -44,6 +48,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LegacyDivergenceAnalysis>();
AU.addRequired<MemorySSAWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.setPreservesAll();
}
@@ -58,6 +63,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
"Add AMDGPU uniform metadata", false, false)
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
"Add AMDGPU uniform metadata", false, false)
@@ -70,9 +76,79 @@ static void setNoClobberMetadata(Instruction *I) {
I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
}
-bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
- const MemoryAccess *MA = MSSA->getWalker()->getClobberingMemoryAccess(Load);
- return !MSSA->isLiveOnEntryDef(MA);
+bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst *Load) {
+ MemorySSAWalker *Walker = MSSA->getWalker();
+ SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)};
+ SmallSet<MemoryAccess *, 8> Visited;
+ MemoryLocation Loc(MemoryLocation::get(Load));
+
+ const auto isReallyAClobber = [this, Load](MemoryDef *Def) -> bool {
+ Instruction *DefInst = Def->getMemoryInst();
+ LLVM_DEBUG(dbgs() << " Def: " << *DefInst << '\n');
+
+ if (isa<FenceInst>(DefInst))
+ return false;
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::amdgcn_s_barrier:
+ case Intrinsic::amdgcn_wave_barrier:
+ return false;
+ default:
+ break;
+ }
+ }
+
+ // Ignore atomics not aliasing with the original load, any atomic is a
+ // universal MemoryDef from MSSA's point of view too, just like a fence.
+ const auto checkNoAlias = [this, Load](auto I) -> bool {
+ return I && AA->isNoAlias(I->getPointerOperand(),
+ Load->getPointerOperand());
+ };
+
+ if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
+ checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
+ return false;
+
+ return true;
+ };
+
+ LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
+
+ // Start with a nearest dominating clobbering access, it will be either
+ // live on entry (nothing to do, load is not clobbered), MemoryDef, or
+ // MemoryPhi if several MemoryDefs can define this memory state. In that
+ // case add all Defs to WorkList and continue going up and checking all
+ // the definitions of this memory location until the root. When all the
+ // defs are exhausted and came to the entry state we have no clobber.
+ // Along the scan ignore barriers and fences which are considered clobbers
+ // by the MemorySSA, but not really writing anything into the memory.
+ while (!WorkList.empty()) {
+ MemoryAccess *MA = WorkList.pop_back_val();
+ if (!Visited.insert(MA).second)
+ continue;
+
+ if (MSSA->isLiveOnEntryDef(MA))
+ continue;
+
+ if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
+ if (isReallyAClobber(Def)) {
+ LLVM_DEBUG(dbgs() << " -> load is clobbered\n");
+ return true;
+ }
+
+ WorkList.push_back(
+ Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
+ continue;
+ }
+
+ const MemoryPhi *Phi = cast<MemoryPhi>(MA);
+ for (auto &Use : Phi->incoming_values())
+ WorkList.push_back(cast<MemoryAccess>(&Use));
+ }
+
+ LLVM_DEBUG(dbgs() << " -> no clobber\n");
+ return false;
}
void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
@@ -84,9 +160,6 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
Value *Ptr = I.getPointerOperand();
if (!DA->isUniform(Ptr))
return;
- auto isGlobalLoad = [&](LoadInst &Load)->bool {
- return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
- };
// We're tracking up to the Function boundaries, and cannot go beyond because
// of FunctionPass restrictions. We can ensure that is memory not clobbered
// for memory operations that are live in to entry points only.
@@ -99,7 +172,7 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
}
bool NotClobbered = false;
- bool GlobalLoad = isGlobalLoad(I);
+ bool GlobalLoad = I.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
if (PtrI)
NotClobbered = GlobalLoad && !isClobberedInFunction(&I);
else if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
@@ -139,6 +212,7 @@ bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
DA = &getAnalysis<LegacyDivergenceAnalysis>();
MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv());
visit(F);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index bb2e723f4ab0..6e2984f2a04f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -88,6 +88,8 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
HSAMetadataStream.reset(new HSAMD::MetadataStreamerV2());
} else if (isHsaAbiVersion3(getGlobalSTI())) {
HSAMetadataStream.reset(new HSAMD::MetadataStreamerV3());
+ } else if (isHsaAbiVersion5(getGlobalSTI())) {
+ HSAMetadataStream.reset(new HSAMD::MetadataStreamerV5());
} else {
HSAMetadataStream.reset(new HSAMD::MetadataStreamerV4());
}
@@ -118,7 +120,7 @@ void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) {
TM.getTargetTriple().getOS() != Triple::AMDPAL)
return;
- if (isHsaAbiVersion3Or4(getGlobalSTI()))
+ if (isHsaAbiVersion3AndAbove(getGlobalSTI()))
getTargetStreamer()->EmitDirectiveAMDGCNTarget();
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
@@ -127,7 +129,7 @@ void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) {
if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
getTargetStreamer()->getPALMetadata()->readFromIR(M);
- if (isHsaAbiVersion3Or4(getGlobalSTI()))
+ if (isHsaAbiVersion3AndAbove(getGlobalSTI()))
return;
// HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2.
@@ -259,7 +261,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
void AMDGPUAsmPrinter::emitFunctionEntryLabel() {
if (TM.getTargetTriple().getOS() == Triple::AMDHSA &&
- isHsaAbiVersion3Or4(getGlobalSTI())) {
+ isHsaAbiVersion3AndAbove(getGlobalSTI())) {
AsmPrinter::emitFunctionEntryLabel();
return;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 3ac7c45b3275..f5018e3a19ac 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -672,15 +672,15 @@ void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
Kern[".kind"] = Kern.getDocument()->getNode("fini");
}
-void MetadataStreamerV3::emitKernelArgs(const Function &Func,
- const GCNSubtarget &ST,
+void MetadataStreamerV3::emitKernelArgs(const MachineFunction &MF,
msgpack::MapDocNode Kern) {
+ auto &Func = MF.getFunction();
unsigned Offset = 0;
auto Args = HSAMetadataDoc->getArrayNode();
for (auto &Arg : Func.args())
emitKernelArg(Arg, Offset, Args);
- emitHiddenKernelArgs(Func, ST, Offset, Args);
+ emitHiddenKernelArgs(MF, Offset, Args);
Kern[".args"] = Args;
}
@@ -789,10 +789,12 @@ void MetadataStreamerV3::emitKernelArg(
Args.push_back(Arg);
}
-void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
- const GCNSubtarget &ST,
+void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF,
unsigned &Offset,
msgpack::ArrayDocNode Args) {
+ auto &Func = MF.getFunction();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+
unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
if (!HiddenArgNumBytes)
return;
@@ -910,7 +912,6 @@ void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) {
auto &Func = MF.getFunction();
auto Kern = getHSAKernelProps(MF, ProgramInfo);
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
Func.getCallingConv() == CallingConv::SPIR_KERNEL);
@@ -924,7 +925,7 @@ void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
(Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
emitKernelLanguage(Func, Kern);
emitKernelAttrs(Func, Kern);
- emitKernelArgs(Func, ST, Kern);
+ emitKernelArgs(MF, Kern);
}
Kernels.push_back(Kern);
@@ -954,6 +955,97 @@ void MetadataStreamerV4::begin(const Module &Mod,
getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
}
+//===----------------------------------------------------------------------===//
+// HSAMetadataStreamerV5
+//===----------------------------------------------------------------------===//
+
+void MetadataStreamerV5::emitVersion() {
+ auto Version = HSAMetadataDoc->getArrayNode();
+ Version.push_back(Version.getDocument()->getNode(VersionMajorV5));
+ Version.push_back(Version.getDocument()->getNode(VersionMinorV5));
+ getRootMetadata("amdhsa.version") = Version;
+}
+
+void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF,
+ unsigned &Offset,
+ msgpack::ArrayDocNode Args) {
+ auto &Func = MF.getFunction();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const Module *M = Func.getParent();
+ auto &DL = M->getDataLayout();
+
+ auto Int64Ty = Type::getInt64Ty(Func.getContext());
+ auto Int32Ty = Type::getInt32Ty(Func.getContext());
+ auto Int16Ty = Type::getInt16Ty(Func.getContext());
+
+ emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
+ emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
+ emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
+
+ emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
+ emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
+ emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
+
+ emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
+ emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
+ emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
+
+ // Reserved for hidden_tool_correlation_id.
+ Offset += 8;
+
+ Offset += 8; // Reserved.
+
+ emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);
+ emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);
+ emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);
+
+ emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
+
+ Offset += 6; // Reserved.
+ auto Int8PtrTy =
+ Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
+
+ if (M->getNamedMetadata("llvm.printf.fmts")) {
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
+ Args);
+ } else
+ Offset += 8; // Skipped.
+
+ if (M->getModuleFlag("amdgpu_hostcall")) {
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
+ Args);
+ } else
+ Offset += 8; // Skipped.
+
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
+ Args);
+
+ // Ignore temporarily until it is implemented.
+ // emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
+ Offset += 8;
+
+ if (Func.hasFnAttribute("calls-enqueue-kernel")) {
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
+ Args);
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
+ Args);
+ } else
+ Offset += 16; // Skipped.
+
+ Offset += 72; // Reserved.
+
+ // hidden_private_base and hidden_shared_base are only used by GFX8.
+ if (ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
+ emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
+ } else
+ Offset += 8; // Skipped.
+
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+ if (MFI.hasQueuePtr())
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
+}
+
} // end namespace HSAMD
} // end namespace AMDGPU
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index 54ed0afbba6d..bcf7fc449094 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -53,6 +53,11 @@ public:
virtual void emitKernel(const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) = 0;
+
+protected:
+ virtual void emitVersion() = 0;
+ virtual void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
+ msgpack::ArrayDocNode Args) = 0;
};
// TODO: Rename MetadataStreamerV3 -> MetadataStreamerMsgPackV3.
@@ -79,7 +84,7 @@ protected:
msgpack::MapDocNode getHSAKernelProps(const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) const;
- void emitVersion();
+ void emitVersion() override;
void emitPrintf(const Module &Mod);
@@ -87,8 +92,7 @@ protected:
void emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern);
- void emitKernelArgs(const Function &Func, const GCNSubtarget &ST,
- msgpack::MapDocNode Kern);
+ void emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern);
void emitKernelArg(const Argument &Arg, unsigned &Offset,
msgpack::ArrayDocNode Args);
@@ -100,8 +104,8 @@ protected:
StringRef BaseTypeName = "", StringRef AccQual = "",
StringRef TypeQual = "");
- void emitHiddenKernelArgs(const Function &Func, const GCNSubtarget &ST,
- unsigned &Offset, msgpack::ArrayDocNode Args);
+ void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
+ msgpack::ArrayDocNode Args) override;
msgpack::DocNode &getRootMetadata(StringRef Key) {
return HSAMetadataDoc->getRoot().getMap(/*Convert=*/true)[Key];
@@ -127,9 +131,9 @@ public:
};
// TODO: Rename MetadataStreamerV4 -> MetadataStreamerMsgPackV4.
-class MetadataStreamerV4 final : public MetadataStreamerV3 {
- void emitVersion();
-
+class MetadataStreamerV4 : public MetadataStreamerV3 {
+protected:
+ void emitVersion() override;
void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID);
public:
@@ -140,6 +144,18 @@ public:
const IsaInfo::AMDGPUTargetID &TargetID) override;
};
+// TODO: Rename MetadataStreamerV5 -> MetadataStreamerMsgPackV5.
+class MetadataStreamerV5 final : public MetadataStreamerV4 {
+protected:
+ void emitVersion() override;
+ void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
+ msgpack::ArrayDocNode Args) override;
+
+public:
+ MetadataStreamerV5() = default;
+ ~MetadataStreamerV5() = default;
+};
+
// TODO: Rename MetadataStreamerV2 -> MetadataStreamerYamlV2.
class MetadataStreamerV2 final : public MetadataStreamer {
private:
@@ -167,8 +183,6 @@ private:
const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) const;
- void emitVersion();
-
void emitPrintf(const Module &Mod);
void emitKernelLanguage(const Function &Func);
@@ -191,6 +205,13 @@ private:
return HSAMetadata;
}
+protected:
+ void emitVersion() override;
+ void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
+ msgpack::ArrayDocNode Args) override {
+ llvm_unreachable("Dummy override should not be invoked!");
+ }
+
public:
MetadataStreamerV2() = default;
~MetadataStreamerV2() = default;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 04c6f67ed339..645d05aa9238 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4778,6 +4778,7 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI,
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
return legalizeTrapHsaQueuePtr(MI, MRI, B);
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
return ST.supportsGetDoorbellID() ?
legalizeTrapHsa(MI, MRI, B) :
legalizeTrapHsaQueuePtr(MI, MRI, B);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index c28427758ac7..bbbadfdfd444 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -16,8 +16,9 @@
#include "GCNSubtarget.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Loads.h"
-#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 2d8126a49327..99b7ffb33884 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -13,15 +13,16 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"
-#include "Utils/AMDGPUBaseInfo.h"
#define DEBUG_TYPE "amdgpu-promote-alloca"
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index c1c88d9a7462..ffe626513d47 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1129,7 +1129,8 @@ class KernelScopeInfo {
if (i >= SgprIndexUnusedMin) {
SgprIndexUnusedMin = ++i;
if (Ctx) {
- MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
+ MCSymbol* const Sym =
+ Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
}
}
@@ -1139,7 +1140,8 @@ class KernelScopeInfo {
if (i >= VgprIndexUnusedMin) {
VgprIndexUnusedMin = ++i;
if (Ctx) {
- MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
+ MCSymbol* const Sym =
+ Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
}
}
@@ -1296,7 +1298,7 @@ public:
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
MCContext &Ctx = getContext();
- if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
+ if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
MCSymbol *Sym =
Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
@@ -1313,7 +1315,7 @@ public:
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
}
- if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
+ if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
initializeGprCountSymbol(IS_VGPR);
initializeGprCountSymbol(IS_SGPR);
} else
@@ -2747,7 +2749,7 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
return nullptr;
}
- if (isHsaAbiVersion3Or4(&getSTI())) {
+ if (isHsaAbiVersion3AndAbove(&getSTI())) {
if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
return nullptr;
} else
@@ -5099,7 +5101,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
const char *AssemblerDirectiveBegin;
const char *AssemblerDirectiveEnd;
std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
- isHsaAbiVersion3Or4(&getSTI())
+ isHsaAbiVersion3AndAbove(&getSTI())
? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
HSAMD::V3::AssemblerDirectiveEnd)
: std::make_tuple(HSAMD::AssemblerDirectiveBegin,
@@ -5116,7 +5118,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
HSAMetadataString))
return true;
- if (isHsaAbiVersion3Or4(&getSTI())) {
+ if (isHsaAbiVersion3AndAbove(&getSTI())) {
if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
return Error(getLoc(), "invalid HSA metadata");
} else {
@@ -5266,7 +5268,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
- if (isHsaAbiVersion3Or4(&getSTI())) {
+ if (isHsaAbiVersion3AndAbove(&getSTI())) {
if (IDVal == ".amdhsa_kernel")
return ParseDirectiveAMDHSAKernel();
@@ -7440,7 +7442,7 @@ void AMDGPUAsmParser::onBeginOfFile() {
if (!getTargetStreamer().getTargetID())
getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
- if (isHsaAbiVersion3Or4(&getSTI()))
+ if (isHsaAbiVersion3AndAbove(&getSTI()))
getTargetStreamer().EmitDirectiveAMDGCNTarget();
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 9578bdb0bad0..7aa5f1abf65b 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -396,6 +396,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
break;
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
if (getTargetID()->isXnackSupported())
OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
break;
@@ -578,6 +579,7 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
return getEFlagsV3();
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
return getEFlagsV4();
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 561866b5a398..e2f4a0896bc3 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5423,6 +5423,7 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
return lowerTrapHsaQueuePtr(Op, DAG);
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
return Subtarget->supportsGetDoorbellID() ?
lowerTrapHsa(Op, DAG) : lowerTrapHsaQueuePtr(Op, DAG);
}
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index c18637bdbc43..44bdbe37dec0 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -938,12 +938,6 @@ bool SILoadStoreOptimizer::checkAndPrepareMerge(
// 2. It is safe to move MBBI down past the instruction that I will
// be merged into.
- if (MBBI->hasUnmodeledSideEffects()) {
- // We can't re-order this instruction with respect to other memory
- // operations, so we fail both conditions mentioned above.
- return false;
- }
-
if (MBBI->mayLoadOrStore() &&
(!memAccessesCanBeReordered(*CI.I, *MBBI, AA) ||
!canMoveInstsAcrossMemOp(*MBBI, InstsToMove, AA))) {
@@ -1977,10 +1971,10 @@ SILoadStoreOptimizer::collectMergeableInsts(
if (promoteConstantOffsetToImm(MI, Visited, AnchorList))
Modified = true;
- // Don't combine if volatile. We also won't be able to merge across this, so
- // break the search. We can look after this barrier for separate merges.
- if (MI.hasOrderedMemoryRef()) {
- LLVM_DEBUG(dbgs() << "Breaking search on memory fence: " << MI);
+ // Treat volatile accesses, ordered accesses and unmodeled side effects as
+ // barriers. We can look after this barrier for separate merges.
+ if (MI.hasOrderedMemoryRef() || MI.hasUnmodeledSideEffects()) {
+ LLVM_DEBUG(dbgs() << "Breaking search on barrier: " << MI);
// Search will resume after this instruction in a separate merge list.
++BlockI;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 1e96266eb06c..683be871ff82 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -99,6 +99,8 @@ Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
case 4:
return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
+ case 5:
+ return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
default:
report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") +
Twine(AmdhsaCodeObjectVersion));
@@ -123,8 +125,15 @@ bool isHsaAbiVersion4(const MCSubtargetInfo *STI) {
return false;
}
-bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI) {
- return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI);
+bool isHsaAbiVersion5(const MCSubtargetInfo *STI) {
+ if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
+ return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V5;
+ return false;
+}
+
+bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) {
+ return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI) ||
+ isHsaAbiVersion5(STI);
}
#define GET_MIMGBaseOpcodesTable_IMPL
@@ -495,6 +504,7 @@ std::string AMDGPUTargetID::toString() const {
Features += "+sram-ecc";
break;
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
// sramecc.
if (getSramEccSetting() == TargetIDSetting::Off)
Features += ":sramecc-";
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 89f928eb8b92..4516b511f3c8 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -47,9 +47,12 @@ bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 4,
/// false otherwise.
bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
+/// \returns True if HSA OS ABI Version identification is 5,
+/// false otherwise.
+bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 3 or 4,
/// false otherwise.
-bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI);
+bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);
struct GcnBufferFormatInfo {
unsigned Format;
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 4efbdbb2abc8..27edf69b4abf 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -656,6 +656,8 @@ def ProcA710 : SubtargetFeature<"cortex-a710", "ARMProcFamily",
"CortexA710", "Cortex-A710 ARM processors", []>;
def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
"Cortex-X1 ARM processors", []>;
+def ProcX1C : SubtargetFeature<"cortex-x1c", "ARMProcFamily", "CortexX1C",
+ "Cortex-X1C ARM processors", []>;
def ProcV1 : SubtargetFeature<"neoverse-v1", "ARMProcFamily",
"NeoverseV1", "Neoverse-V1 ARM processors", []>;
@@ -1443,6 +1445,14 @@ def : ProcNoItin<"cortex-x1", [ARMv82a, ProcX1,
FeatureFullFP16,
FeatureDotProd]>;
+def : ProcNoItin<"cortex-x1c", [ARMv82a, ProcX1C,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureFullFP16,
+ FeatureDotProd]>;
+
def : ProcNoItin<"neoverse-v1", [ARMv84a,
FeatureHWDivThumb,
FeatureHWDivARM,
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index cde715880376..5b0bae4d9274 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -752,23 +752,17 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
const MCInstrDesc &MCID = MI.getDesc();
- if (MCID.getSize())
- return MCID.getSize();
switch (MI.getOpcode()) {
default:
- // pseudo-instruction sizes are zero.
- return 0;
+ // Return the size specified in .td file. If there's none, return 0, as we
+ // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2
+ // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in
+ // contrast to AArch64 instructions which have a default size of 4 bytes for
+ // example.
+ return MCID.getSize();
case TargetOpcode::BUNDLE:
return getInstBundleLength(MI);
- case ARM::MOVi16_ga_pcrel:
- case ARM::MOVTi16_ga_pcrel:
- case ARM::t2MOVi16_ga_pcrel:
- case ARM::t2MOVTi16_ga_pcrel:
- return 4;
- case ARM::MOVi32imm:
- case ARM::t2MOVi32imm:
- return 8;
case ARM::CONSTPOOL_ENTRY:
case ARM::JUMPTABLE_INSTS:
case ARM::JUMPTABLE_ADDRS:
@@ -777,19 +771,6 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
// If this machine instr is a constant pool entry, its size is recorded as
// operand #2.
return MI.getOperand(2).getImm();
- case ARM::Int_eh_sjlj_longjmp:
- return 16;
- case ARM::tInt_eh_sjlj_longjmp:
- return 10;
- case ARM::tInt_WIN_eh_sjlj_longjmp:
- return 12;
- case ARM::Int_eh_sjlj_setjmp:
- case ARM::Int_eh_sjlj_setjmp_nofp:
- return 20;
- case ARM::tInt_eh_sjlj_setjmp:
- case ARM::t2Int_eh_sjlj_setjmp:
- case ARM::t2Int_eh_sjlj_setjmp_nofp:
- return 12;
case ARM::SPACE:
return MI.getOperand(1).getImm();
case ARM::INLINEASM:
@@ -800,14 +781,6 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
Size = alignTo(Size, 4);
return Size;
}
- case ARM::SpeculationBarrierISBDSBEndBB:
- case ARM::t2SpeculationBarrierISBDSBEndBB:
- // This gets lowered to 2 4-byte instructions.
- return 8;
- case ARM::SpeculationBarrierSBEndBB:
- case ARM::t2SpeculationBarrierSBEndBB:
- // This gets lowered to 1 4-byte instructions.
- return 4;
}
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index fe4e6b24367a..1b41427a1cab 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -14527,7 +14527,7 @@ static SDValue PerformXORCombine(SDNode *N,
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
const TargetLowering *TLI = Subtarget->getTargetLowering();
- if (TLI->isConstTrueVal(N1.getNode()) &&
+ if (TLI->isConstTrueVal(N1) &&
(N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
if (CanInvertMVEVCMP(N0)) {
SDLoc DL(N0);
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 1c1db473f866..32a3911d3369 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -3657,6 +3657,8 @@ def : InstAlias<"mov${p} $Rd, $imm",
(MOVi16 GPR:$Rd, imm0_65535_expr:$imm, pred:$p), 0>,
Requires<[IsARM, HasV6T2]>;
+// This gets lowered to a single 4-byte instructions
+let Size = 4 in
def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
(ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
Sched<[WriteALU]>;
@@ -3680,6 +3682,8 @@ def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd),
let DecoderMethod = "DecodeArmMOVTWInstruction";
}
+// This gets lowered to a single 4-byte instructions
+let Size = 4 in
def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
(ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
Sched<[WriteALU]>;
@@ -5895,27 +5899,30 @@ def : ARMPat<(ARMthread_pointer), (MRC 15, 0, 13, 0, 3)>,
//
// These are pseudo-instructions and are lowered to individual MC-insts, so
// no encoding information is necessary.
+// This gets lowered to an instruction sequence of 20 bytes
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ],
- hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1, Size = 20 in {
def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
NoItinerary,
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
Requires<[IsARM, HasVFP2]>;
}
+// This gets lowered to an instruction sequence of 20 bytes
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ],
- hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1, Size = 20 in {
def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
NoItinerary,
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
Requires<[IsARM, NoVFP]>;
}
+// This gets lowered to an instruction sequence of 16 bytes
// FIXME: Non-IOS version(s)
-let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, Size = 16,
Defs = [ R7, LR, SP ] in {
def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
NoItinerary,
@@ -5958,7 +5965,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in
// This is a single pseudo instruction, the benefit is that it can be remat'd
// as a single unit instead of having to handle reg inputs.
// FIXME: Remove this when we can do generalized remat.
-let isReMaterializable = 1, isMoveImm = 1 in
+let isReMaterializable = 1, isMoveImm = 1, Size = 8 in
def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
[(set GPR:$dst, (arm_i32imm:$src))]>,
Requires<[IsARM]>;
@@ -6419,8 +6426,12 @@ def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
// SpeculationBarrierEndBB must only be used after an unconditional control
// flow, i.e. after a terminator for which isBarrier is True.
let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
+ // This gets lowered to a pair of 4-byte instructions
+ let Size = 8 in
def SpeculationBarrierISBDSBEndBB
: PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+ // This gets lowered to a single 4-byte instructions
+ let Size = 4 in
def SpeculationBarrierSBEndBB
: PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
}
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td
index f09ad8167600..71527ae1ab11 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -1537,25 +1537,28 @@ def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br,
// Defs. By doing so, we also cause the prologue/epilogue code to actively
// preserve all of the callee-saved registers, which is exactly what we want.
// $val is a scratch register for our use.
+// This gets lowered to an instruction sequence of 12 bytes
let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ],
- hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, Size = 12,
usesCustomInserter = 1 in
def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
AddrModeNone, 0, NoItinerary, "","",
[(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
+// This gets lowered to an instruction sequence of 10 bytes
// FIXME: Non-IOS version(s)
let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
- Defs = [ R7, LR, SP ] in
+ Size = 10, Defs = [ R7, LR, SP ] in
def tInt_eh_sjlj_longjmp : XI<(outs), (ins tGPR:$src, tGPR:$scratch),
AddrModeNone, 0, IndexModeNone,
Pseudo, NoItinerary, "", "",
[(ARMeh_sjlj_longjmp tGPR:$src, tGPR:$scratch)]>,
Requires<[IsThumb,IsNotWindows]>;
+// This gets lowered to an instruction sequence of 12 bytes
// (Windows is Thumb2-only)
let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
- Defs = [ R11, LR, SP ] in
+ Size = 12, Defs = [ R11, LR, SP ] in
def tInt_WIN_eh_sjlj_longjmp
: XI<(outs), (ins GPR:$src, GPR:$scratch), AddrModeNone, 0, IndexModeNone,
Pseudo, NoItinerary, "", "", [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 6e8e61ca2b8e..f80b9a5053f7 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -2194,6 +2194,8 @@ def : InstAlias<"mov${p} $Rd, $imm",
(t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p), 0>,
Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteALU]>;
+// This gets lowered to a single 4-byte instructions
+let Size = 4 in
def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
(ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
Sched<[WriteALU]>;
@@ -2223,6 +2225,8 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd),
let DecoderMethod = "DecodeT2MOVTWInstruction";
}
+// This gets lowered to a single 4-byte instructions
+let Size = 4 in
def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
(ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
Sched<[WriteALU]>, Requires<[IsThumb, HasV8MBaseline]>;
@@ -3814,10 +3818,11 @@ def : T2Pat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr),
// doing so, we also cause the prologue/epilogue code to actively preserve
// all of the callee-saved registers, which is exactly what we want.
// $val is a scratch register for our use.
+// This gets lowered to an instruction sequence of 12 bytes
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15],
- hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, Size = 12,
usesCustomInserter = 1 in {
def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
AddrModeNone, 0, NoItinerary, "", "",
@@ -3825,9 +3830,10 @@ let Defs =
Requires<[IsThumb2, HasVFP2]>;
}
+// This gets lowered to an instruction sequence of 12 bytes
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ],
- hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, Size = 12,
usesCustomInserter = 1 in {
def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
AddrModeNone, 0, NoItinerary, "", "",
@@ -4224,7 +4230,7 @@ def : t2InstAlias<"hvc\t$imm16", (t2HVC imm0_65535:$imm16)>;
// 32-bit immediate using movw + movt.
// This is a single pseudo instruction to make it re-materializable.
// FIXME: Remove this when we can do generalized remat.
-let isReMaterializable = 1, isMoveImm = 1 in
+let isReMaterializable = 1, isMoveImm = 1, Size = 8 in
def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
[(set rGPR:$dst, (i32 imm:$src))]>,
Requires<[IsThumb, UseMovt]>;
@@ -5006,8 +5012,12 @@ def : InstAlias<"dfb${p}", (t2DSB 0xc, pred:$p), 1>, Requires<[HasDFB]>;
// SpeculationBarrierEndBB must only be used after an unconditional control
// flow, i.e. after a terminator for which isBarrier is True.
let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
+ // This gets lowered to a pair of 4-byte instructions
+ let Size = 8 in
def t2SpeculationBarrierISBDSBEndBB
: PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+ // This gets lowered to a single 4-byte instructions
+ let Size = 4 in
def t2SpeculationBarrierSBEndBB
: PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 2dd25234dc50..32160b109343 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -304,6 +304,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
case CortexM7:
case CortexR52:
case CortexX1:
+ case CortexX1C:
break;
case Exynos:
LdStMultipleTiming = SingleIssuePlusExtras;
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 1c2b7ee6ba35..7cbdc014299f 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -77,6 +77,7 @@ protected:
CortexR52,
CortexR7,
CortexX1,
+ CortexX1C,
Exynos,
Krait,
Kryo,
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index e0750a9945d2..d9d563ead260 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -2109,9 +2109,6 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
}
Type *T = I.getType();
- if (T->isPointerTy())
- T = T->getPointerElementType();
-
if (T->getScalarSizeInBits() > 32) {
LLVM_DEBUG(dbgs() << "Unsupported Type: "; T->dump());
return false;
diff --git a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index ea6a7498e27f..311e43d77210 100644
--- a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -313,12 +313,18 @@ bool HexagonOptAddrMode::isSafeToExtLR(NodeAddr<StmtNode *> SN,
return false;
}
+ // If the register is undefined (for example if it's a reserved register),
+ // it may still be possible to extend the range, but it's safer to be
+ // conservative and just punt.
+ if (LRExtRegRD == 0)
+ return false;
+
MachineInstr *UseMI = NodeAddr<StmtNode *>(IA).Addr->getCode();
NodeAddr<DefNode *> LRExtRegDN = DFG->addr<DefNode *>(LRExtRegRD);
// Reaching Def to LRExtReg can't be a phi.
if ((LRExtRegDN.Addr->getFlags() & NodeAttrs::PhiRef) &&
MI->getParent() != UseMI->getParent())
- return false;
+ return false;
}
return true;
}
diff --git a/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp
index 860c0ce29326..79e9ad4dd1d2 100644
--- a/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp
+++ b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp
@@ -21,13 +21,32 @@ using namespace llvm;
M68kLegalizerInfo::M68kLegalizerInfo(const M68kSubtarget &ST) {
using namespace TargetOpcode;
- const LLT S32 = LLT::scalar(32);
- const LLT P0 = LLT::pointer(0, 32);
- getActionDefinitionsBuilder(G_LOAD).legalFor({S32});
- getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({P0});
- getActionDefinitionsBuilder(G_ADD).legalFor({S32});
- getActionDefinitionsBuilder(G_SUB).legalFor({S32});
- getActionDefinitionsBuilder(G_MUL).legalFor({S32});
- getActionDefinitionsBuilder(G_UDIV).legalFor({S32});
+ const LLT s8 = LLT::scalar(8);
+ const LLT s16 = LLT::scalar(16);
+ const LLT s32 = LLT::scalar(32);
+ const LLT p0 = LLT::pointer(0, 32);
+
+ getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_UDIV, G_AND})
+ .legalFor({s8, s16, s32})
+ .clampScalar(0, s8, s32)
+ .widenScalarToNextPow2(0, 8);
+
+ getActionDefinitionsBuilder(G_CONSTANT)
+ .legalFor({s32, p0})
+ .clampScalar(0, s32, s32);
+
+ getActionDefinitionsBuilder({G_FRAME_INDEX, G_GLOBAL_VALUE}).legalFor({p0});
+
+ getActionDefinitionsBuilder({G_STORE, G_LOAD})
+ .legalForTypesWithMemDesc({{s32, p0, s32, 4},
+ {s32, p0, s16, 4},
+ {s32, p0, s8, 4},
+ {s16, p0, s16, 2},
+ {s8, p0, s8, 1},
+ {p0, p0, s32, 4}})
+ .clampScalar(0, s8, s32);
+
+ getActionDefinitionsBuilder(G_PTR_ADD).legalFor({{p0, s32}});
+
getLegacyLegalizerInfo().computeTables();
}
diff --git a/llvm/lib/Target/M68k/M68kInstrBits.td b/llvm/lib/Target/M68k/M68kInstrBits.td
index d610bce5c277..0d1278102378 100644
--- a/llvm/lib/Target/M68k/M68kInstrBits.td
+++ b/llvm/lib/Target/M68k/M68kInstrBits.td
@@ -79,6 +79,10 @@ def BTST32di : MxBTST_RI<MxType32d>;
// Memory BTST limited to 8 bits only
def BTST8jd : MxBTST_MR<MxType8d, MxType8.JOp, MxType8.JPat,
MxEncEAj_0, MxExtEmpty>;
+def BTST8od : MxBTST_MR<MxType8d, MxType8.OOp, MxType8.OPat,
+ MxEncEAo_0, MxExtEmpty>;
+def BTST8ed : MxBTST_MR<MxType8d, MxType8.EOp, MxType8.EPat,
+ MxEncEAe_0, MxExtEmpty>;
def BTST8pd : MxBTST_MR<MxType8d, MxType8.POp, MxType8.PPat,
MxEncEAp_0, MxExtI16_0>;
def BTST8fd : MxBTST_MR<MxType8d, MxType8.FOp, MxType8.FPat,
@@ -90,6 +94,10 @@ def BTST8kd : MxBTST_MR<MxType8d, MxType8.KOp, MxType8.KPat,
def BTST8ji : MxBTST_MI<MxType8d, MxType8.JOp, MxType8.JPat,
MxEncEAj_0, MxExtEmpty>;
+def BTST8oi : MxBTST_MI<MxType8d, MxType8.OOp, MxType8.OPat,
+ MxEncEAo_0, MxExtEmpty>;
+def BTST8ei : MxBTST_MI<MxType8d, MxType8.EOp, MxType8.EPat,
+ MxEncEAe_0, MxExtEmpty>;
def BTST8pi : MxBTST_MI<MxType8d, MxType8.POp, MxType8.PPat,
MxEncEAp_0, MxExtI16_0>;
def BTST8fi : MxBTST_MI<MxType8d, MxType8.FOp, MxType8.FPat,
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index eac237bb27bb..7b5248906b56 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -574,7 +574,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
for (const auto &Op : {ISD::FMINIMUM, ISD::FMAXIMUM}) {
setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Expand), Expand);
setOperationAction(Op, MVT::f32, GetMinMaxAction(Expand));
- setOperationAction(Op, MVT::f64, GetMinMaxAction(Expand));
setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand);
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 22e200e77831..22084cddc092 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -896,6 +896,7 @@ defm FMUL : F3_fma_component<"mul", fmul>;
defm FMIN : F3<"min", fminnum>;
defm FMAX : F3<"max", fmaxnum>;
+// Note: min.NaN.f64 and max.NaN.f64 do not actually exist.
defm FMINNAN : F3<"min.NaN", fminimum>;
defm FMAXNAN : F3<"max.NaN", fmaximum>;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 25cc34badda0..cbeae0ab03b8 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1252,7 +1252,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
} else {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
@@ -9093,22 +9092,30 @@ bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
unsigned &Opcode) {
- const SDNode *InputNode = Op.getOperand(0).getNode();
- if (!InputNode || !ISD::isUNINDEXEDLoad(InputNode))
- return false;
-
- if (!Subtarget.hasVSX())
+ LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
+ if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
return false;
EVT Ty = Op->getValueType(0);
- if (Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32 ||
- Ty == MVT::v8i16 || Ty == MVT::v16i8)
+ // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
+ // as we cannot handle extending loads for these types.
+ if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
+ ISD::isNON_EXTLoad(InputNode))
+ return true;
+
+ EVT MemVT = InputNode->getMemoryVT();
+ // For v8i16 and v16i8 types, extending loads can be handled as long as the
+ // memory VT is the same vector element VT type.
+ // The loads feeding into the v8i16 and v16i8 types will be extending because
+ // scalar i8/i16 are not legal types.
+ if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
+ (MemVT == Ty.getVectorElementType()))
return true;
if (Ty == MVT::v2i64) {
// Check the extend type, when the input type is i32, and the output vector
// type is v2i64.
- if (cast<LoadSDNode>(Op.getOperand(0))->getMemoryVT() == MVT::i32) {
+ if (MemVT == MVT::i32) {
if (ISD::isZEXTLoad(InputNode))
Opcode = PPCISD::ZEXT_LD_SPLAT;
if (ISD::isSEXTLoad(InputNode))
@@ -10755,6 +10762,26 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
if (VT == MVT::v2f64 && C)
return Op;
+ if (Subtarget.hasP9Vector()) {
+ // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
+ // because on P10, it allows this specific insert_vector_elt load pattern to
+ // utilize the refactored load and store infrastructure in order to exploit
+ // prefixed loads.
+ // On targets with inexpensive direct moves (Power9 and up), a
+ // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
+ // load since a single precision load will involve conversion to double
+ // precision on the load followed by another conversion to single precision.
+ if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
+ (isa<LoadSDNode>(V2))) {
+ SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
+ SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
+ SDValue InsVecElt =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
+ BitcastLoad, Op.getOperand(2));
+ return DAG.getBitcast(MVT::v4f32, InsVecElt);
+ }
+ }
+
if (Subtarget.isISA3_1()) {
if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
return SDValue();
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index fe354208533b..ff43426dd1ef 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -2816,32 +2816,20 @@ let Predicates = [IsISA3_1, HasVSX, IsLittleEndian] in {
def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)),
(VINSWVRX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>;
- def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
- (VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>;
- def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
- (VINSWRX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>;
- def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
- (VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>;
def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>;
- def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load DSForm:$rA)), i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>;
- def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load PDForm:$rA)), i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>;
- def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load XForm:$rA)), i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>;
let AddedComplexity = 400 in {
// Immediate vector insert element
foreach Idx = [0, 1, 2, 3] in {
def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, Idx)),
(VINSW $vDi, !mul(!sub(3, Idx), 4), $rA)>;
- def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), Idx)),
- (VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZ memri:$rA))>;
- def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), Idx)),
- (VINSW $vDi, !mul(!sub(3, Idx), 4), (PLWZ memri34:$rA))>;
- def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), Idx)),
- (VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZX memrr:$rA))>;
}
foreach i = [0, 1] in
def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, (i64 i))),
@@ -2860,12 +2848,6 @@ let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC32] in {
def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i32:$rB)),
(VINSWVLX $vDi, InsertEltShift.Left2, (XSCVDPSPN $rA))>;
- def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i32:$rB)),
- (VINSWLX v4f32:$vDi, InsertEltShift.Left2, (LWZ memri:$rA))>;
- def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i32:$rB)),
- (VINSWLX v4f32:$vDi, InsertEltShift.Left2, (PLWZ memri34:$rA))>;
- def: Pat<(v4f32(insertelt v4f32 : $vDi, (f32(load xaddr : $rA)), i32 : $rB)),
- (VINSWLX v4f32 : $vDi, InsertEltShift.Left2, (LWZX memrr : $rA))>;
}
let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC64] in {
@@ -2881,20 +2863,14 @@ let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC64] in {
def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)),
(VINSWVLX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>;
- def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
- (VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>;
- def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
- (VINSWLX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>;
- def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
- (VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>;
def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>;
- def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load DSForm:$rA)), i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>;
- def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load PDForm:$rA)), i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>;
- def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load XForm:$rA)), i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>;
}
@@ -2904,15 +2880,6 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX, IsBigEndian] in {
foreach Idx = [0, 1, 2, 3] in {
def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, (Ty Idx))),
(VINSW $vDi, !mul(Idx, 4), $rA)>;
- def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
- (Ty Idx))),
- (VINSW $vDi, !mul(Idx, 4), (LWZ memri:$rA))>;
- def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
- (Ty Idx))),
- (VINSW $vDi, !mul(Idx, 4), (PLWZ memri34:$rA))>;
- def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
- (Ty Idx))),
- (VINSW $vDi, !mul(Idx, 4), (LWZX memrr:$rA))>;
}
}
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index a2ea34fe11c7..01f36e6dcdd2 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -2266,8 +2266,8 @@ void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value,
if (Inst.Opc == RISCV::LUI) {
emitToStreamer(
Out, MCInstBuilder(RISCV::LUI).addReg(DestReg).addImm(Inst.Imm));
- } else if (Inst.Opc == RISCV::ADDUW) {
- emitToStreamer(Out, MCInstBuilder(RISCV::ADDUW)
+ } else if (Inst.Opc == RISCV::ADD_UW) {
+ emitToStreamer(Out, MCInstBuilder(RISCV::ADD_UW)
.addReg(DestReg)
.addReg(SrcReg)
.addReg(RISCV::X0));
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 14d0191a505f..1078403a3fd2 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -197,9 +197,9 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
// Get byte count of instruction.
unsigned Size = Desc.getSize();
- // RISCVInstrInfo::getInstSizeInBytes hard-codes the number of expanded
- // instructions for each pseudo, and must be updated when adding new pseudos
- // or changing existing ones.
+ // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
+ // expanded instructions for each pseudo is correct in the Size field of the
+ // tablegen definition for the pseudo.
if (MI.getOpcode() == RISCV::PseudoCALLReg ||
MI.getOpcode() == RISCV::PseudoCALL ||
MI.getOpcode() == RISCV::PseudoTAIL ||
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index 18858209aa9b..e935179e5f9b 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -31,7 +31,7 @@ static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) {
case RISCV::LUI:
Compressed = isInt<6>(Instr.Imm);
break;
- case RISCV::ADDUW:
+ case RISCV::ADD_UW:
Compressed = false;
break;
}
@@ -123,10 +123,11 @@ static void generateInstSeqImpl(int64_t Val,
}
}
- // Try to use SLLIUW for Hi52 when it is uint32 but not int32.
+ // Try to use SLLI_UW for Hi52 when it is uint32 but not int32.
if (isUInt<32>((uint64_t)Hi52) && !isInt<32>((uint64_t)Hi52) &&
ActiveFeatures[RISCV::FeatureStdExtZba]) {
- // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with SLLIUW.
+ // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with
+ // SLLI_UW.
Hi52 = ((uint64_t)Hi52) | (0xffffffffull << 32);
Unsigned = true;
}
@@ -134,7 +135,7 @@ static void generateInstSeqImpl(int64_t Val,
generateInstSeqImpl(Hi52, ActiveFeatures, Res);
if (Unsigned)
- Res.push_back(RISCVMatInt::Inst(RISCV::SLLIUW, ShiftAmount));
+ Res.push_back(RISCVMatInt::Inst(RISCV::SLLI_UW, ShiftAmount));
else
Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount));
if (Lo12)
@@ -210,7 +211,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
TmpSeq.clear();
generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
- TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDUW, 0));
+ TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADD_UW, 0));
// Keep the new sequence if it is an improvement.
if (TmpSeq.size() < Res.size()) {
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 5b0f27c5e937..e32a8fb010de 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -52,11 +52,17 @@ def HasStdExtZfhmin : Predicate<"Subtarget->hasStdExtZfhmin()">,
def FeatureStdExtZfh
: SubtargetFeature<"zfh", "HasStdExtZfh", "true",
"'Zfh' (Half-Precision Floating-Point)",
- [FeatureStdExtZfhmin, FeatureStdExtF]>;
+ [FeatureStdExtF]>;
def HasStdExtZfh : Predicate<"Subtarget->hasStdExtZfh()">,
AssemblerPredicate<(all_of FeatureStdExtZfh),
"'Zfh' (Half-Precision Floating-Point)">;
+def HasStdExtZfhOrZfhmin
+ : Predicate<"Subtarget->hasStdExtZfh() || Subtarget->hasStdExtZfhmin()">,
+ AssemblerPredicate<(any_of FeatureStdExtZfh, FeatureStdExtZfhmin),
+ "'Zfh' (Half-Precision Floating-Point) or "
+ "'Zfhmin' (Half-Precision Floating-Point Minimal)">;
+
def FeatureStdExtC
: SubtargetFeature<"c", "HasStdExtC", "true",
"'C' (Compressed Instructions)">;
diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
index 26ce16486bd9..40ee7ca6bc1c 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
@@ -86,9 +86,9 @@ bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
- // RISCVInstrInfo::getInstSizeInBytes hard-codes the number of expanded
- // instructions for each pseudo, and must be updated when adding new pseudos
- // or changing existing ones.
+ // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
+ // expanded instructions for each pseudo is correct in the Size field of the
+ // tablegen definition for the pseudo.
switch (MBBI->getOpcode()) {
case RISCV::PseudoAtomicLoadNand32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 80340ee81509..0c5c13db7112 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -92,9 +92,9 @@ bool RISCVExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
- // RISCVInstrInfo::getInstSizeInBytes hard-codes the number of expanded
- // instructions for each pseudo, and must be updated when adding new pseudos
- // or changing existing ones.
+ // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
+ // expanded instructions for each pseudo is correct in the Size field of the
+ // tablegen definition for the pseudo.
switch (MBBI->getOpcode()) {
case RISCV::PseudoLLA:
return expandLoadLocalAddress(MBB, MBBI, NextMBBI);
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 5870502d74d5..6f77428ae721 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -166,8 +166,8 @@ static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT);
if (Inst.Opc == RISCV::LUI)
Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm);
- else if (Inst.Opc == RISCV::ADDUW)
- Result = CurDAG->getMachineNode(RISCV::ADDUW, DL, XLenVT, SrcReg,
+ else if (Inst.Opc == RISCV::ADD_UW)
+ Result = CurDAG->getMachineNode(RISCV::ADD_UW, DL, XLenVT, SrcReg,
CurDAG->getRegister(RISCV::X0, XLenVT));
else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD ||
Inst.Opc == RISCV::SH3ADD)
@@ -775,10 +775,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) {
// Use slli.uw when possible.
if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) {
- SDNode *SLLIUW =
- CurDAG->getMachineNode(RISCV::SLLIUW, DL, XLenVT, X,
+ SDNode *SLLI_UW =
+ CurDAG->getMachineNode(RISCV::SLLI_UW, DL, XLenVT, X,
CurDAG->getTargetConstant(C2, DL, XLenVT));
- ReplaceNode(Node, SLLIUW);
+ ReplaceNode(Node, SLLI_UW);
return;
}
@@ -1811,7 +1811,7 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
case RISCV::CLZW:
case RISCV::CTZW:
case RISCV::CPOPW:
- case RISCV::SLLIUW:
+ case RISCV::SLLI_UW:
case RISCV::FCVT_H_W:
case RISCV::FCVT_H_WU:
case RISCV::FCVT_S_W:
@@ -1830,20 +1830,20 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1))))
return false;
break;
- case RISCV::SEXTB:
+ case RISCV::SEXT_B:
if (Bits < 8)
return false;
break;
- case RISCV::SEXTH:
- case RISCV::ZEXTH_RV32:
- case RISCV::ZEXTH_RV64:
+ case RISCV::SEXT_H:
+ case RISCV::ZEXT_H_RV32:
+ case RISCV::ZEXT_H_RV64:
if (Bits < 16)
return false;
break;
- case RISCV::ADDUW:
- case RISCV::SH1ADDUW:
- case RISCV::SH2ADDUW:
- case RISCV::SH3ADDUW:
+ case RISCV::ADD_UW:
+ case RISCV::SH1ADD_UW:
+ case RISCV::SH2ADD_UW:
+ case RISCV::SH3ADD_UW:
// The first operand to add.uw/shXadd.uw is implicitly zero extended from
// 32 bits.
if (UI.getOperandNo() != 0 || Bits < 32)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5cc3aa35d4d2..97d24c8e9c0b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -282,6 +282,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())
? Legal
: Expand);
+ // Zbkb can use rev8+brev8 to implement bitreverse.
+ setOperationAction(ISD::BITREVERSE, XLenVT,
+ Subtarget.hasStdExtZbkb() ? Custom : Expand);
}
if (Subtarget.hasStdExtZbb()) {
@@ -1082,6 +1085,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::STORE);
}
+
+ setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
+ setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
}
EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
@@ -1115,17 +1121,15 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::riscv_masked_atomicrmw_min_i32:
case Intrinsic::riscv_masked_atomicrmw_umax_i32:
case Intrinsic::riscv_masked_atomicrmw_umin_i32:
- case Intrinsic::riscv_masked_cmpxchg_i32: {
- PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
+ case Intrinsic::riscv_masked_cmpxchg_i32:
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
+ Info.memVT = MVT::i32;
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = Align(4);
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
- }
case Intrinsic::riscv_masked_strided_load:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.ptrVal = I.getArgOperand(1);
@@ -2952,17 +2956,26 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return LowerINTRINSIC_VOID(Op, DAG);
case ISD::BSWAP:
case ISD::BITREVERSE: {
- // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
- assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
- // Start with the maximum immediate value which is the bitwidth - 1.
- unsigned Imm = VT.getSizeInBits() - 1;
- // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
- if (Op.getOpcode() == ISD::BSWAP)
- Imm &= ~0x7U;
- return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
- DAG.getConstant(Imm, DL, VT));
+ if (Subtarget.hasStdExtZbp()) {
+ // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
+ // Start with the maximum immediate value which is the bitwidth - 1.
+ unsigned Imm = VT.getSizeInBits() - 1;
+ // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
+ if (Op.getOpcode() == ISD::BSWAP)
+ Imm &= ~0x7U;
+ return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
+ DAG.getConstant(Imm, DL, VT));
+ }
+ assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
+ assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
+ // Expand bitreverse to a bswap(rev8) followed by brev8.
+ SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
+ // We use the Zbp grevi encoding for rev.b/brev8 which will be recognized
+ // as brev8 by an isel pattern.
+ return DAG.getNode(RISCVISD::GREV, DL, VT, BSwap,
+ DAG.getConstant(7, DL, VT));
}
case ISD::FSHL:
case ISD::FSHR: {
@@ -3063,6 +3076,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
// vscale as VLENB / 8.
static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
+ if (Subtarget.getMinVLen() < RISCV::RVVBitsPerBlock)
+ report_fatal_error("Support for VLEN==32 is incomplete.");
if (isa<ConstantSDNode>(Op.getOperand(0))) {
// We assume VLENB is a multiple of 8. We manually choose the best shift
// here because SimplifyDemandedBits isn't always able to simplify it.
@@ -4288,8 +4303,47 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
MVT XLenVT = Subtarget.getXLenVT();
if (VecVT.getVectorElementType() == MVT::i1) {
- // FIXME: For now we just promote to an i8 vector and extract from that,
- // but this is probably not optimal.
+ if (VecVT.isFixedLengthVector()) {
+ unsigned NumElts = VecVT.getVectorNumElements();
+ if (NumElts >= 8) {
+ MVT WideEltVT;
+ unsigned WidenVecLen;
+ SDValue ExtractElementIdx;
+ SDValue ExtractBitIdx;
+ unsigned MaxEEW = Subtarget.getMaxELENForFixedLengthVectors();
+ MVT LargestEltVT = MVT::getIntegerVT(
+ std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
+ if (NumElts <= LargestEltVT.getSizeInBits()) {
+ assert(isPowerOf2_32(NumElts) &&
+ "the number of elements should be power of 2");
+ WideEltVT = MVT::getIntegerVT(NumElts);
+ WidenVecLen = 1;
+ ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
+ ExtractBitIdx = Idx;
+ } else {
+ WideEltVT = LargestEltVT;
+ WidenVecLen = NumElts / WideEltVT.getSizeInBits();
+ // extract element index = index / element width
+ ExtractElementIdx = DAG.getNode(
+ ISD::SRL, DL, XLenVT, Idx,
+ DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
+ // mask bit index = index % element width
+ ExtractBitIdx = DAG.getNode(
+ ISD::AND, DL, XLenVT, Idx,
+ DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
+ }
+ MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
+ Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
+ SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
+ Vec, ExtractElementIdx);
+ // Extract the bit from GPR.
+ SDValue ShiftRight =
+ DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
+ return DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
+ DAG.getConstant(1, DL, XLenVT));
+ }
+ }
+ // Otherwise, promote to an i8 vector and extract from that.
MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
@@ -4411,15 +4465,30 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getRegister(RISCV::X4, PtrVT);
}
case Intrinsic::riscv_orc_b:
- // Lower to the GORCI encoding for orc.b.
- return DAG.getNode(RISCVISD::GORC, DL, XLenVT, Op.getOperand(1),
+ case Intrinsic::riscv_brev8: {
+ // Lower to the GORCI encoding for orc.b or the GREVI encoding for brev8.
+ unsigned Opc =
+ IntNo == Intrinsic::riscv_brev8 ? RISCVISD::GREV : RISCVISD::GORC;
+ return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1),
DAG.getConstant(7, DL, XLenVT));
+ }
case Intrinsic::riscv_grev:
case Intrinsic::riscv_gorc: {
unsigned Opc =
IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
}
+ case Intrinsic::riscv_zip:
+ case Intrinsic::riscv_unzip: {
+ // Lower to the SHFLI encoding for zip or the UNSHFLI encoding for unzip.
+ // For i32 the immdiate is 15. For i64 the immediate is 31.
+ unsigned Opc =
+ IntNo == Intrinsic::riscv_zip ? RISCVISD::SHFL : RISCVISD::UNSHFL;
+ unsigned BitWidth = Op.getValueSizeInBits();
+ assert(isPowerOf2_32(BitWidth) && BitWidth >= 2 && "Unexpected bit width");
+ return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1),
+ DAG.getConstant((BitWidth / 2) - 1, DL, XLenVT));
+ }
case Intrinsic::riscv_shfl:
case Intrinsic::riscv_unshfl: {
unsigned Opc =
@@ -5829,14 +5898,17 @@ SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
}
}
- if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
- IndexVT = IndexVT.changeVectorElementType(XLenVT);
- Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
- }
-
if (!VL)
VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+ if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
+ IndexVT = IndexVT.changeVectorElementType(XLenVT);
+ SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
+ VL);
+ Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
+ TrueMask, VL);
+ }
+
unsigned IntID =
IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
@@ -5937,14 +6009,17 @@ SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
}
}
- if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
- IndexVT = IndexVT.changeVectorElementType(XLenVT);
- Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
- }
-
if (!VL)
VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+ if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
+ IndexVT = IndexVT.changeVectorElementType(XLenVT);
+ SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
+ VL);
+ Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
+ TrueMask, VL);
+ }
+
unsigned IntID =
IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
@@ -6568,7 +6643,11 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
unsigned Opc =
IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFLW : RISCVISD::UNSHFLW;
- if (isa<ConstantSDNode>(N->getOperand(2))) {
+ // There is no (UN)SHFLIW. If the control word is a constant, we can use
+ // (UN)SHFLI with bit 4 of the control word cleared. The upper 32 bit half
+ // will be shuffled the same way as the lower 32 bit half, but the two
+ // halves won't cross.
+ if (isa<ConstantSDNode>(NewOp2)) {
NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
DAG.getConstant(0xf, DL, MVT::i64));
Opc =
@@ -7284,8 +7363,8 @@ static SDValue performANY_EXTENDCombine(SDNode *N,
return SDValue(N, 0);
}
-// Try to form VWMUL or VWMULU.
-// FIXME: Support VWMULSU.
+// Try to form VWMUL, VWMULU or VWMULSU.
+// TODO: Support VWMULSU.vx with a sign extend Op and a splat of scalar Op.
static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
bool Commute) {
assert(N->getOpcode() == RISCVISD::MUL_VL && "Unexpected opcode");
@@ -7296,6 +7375,7 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL;
bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL;
+ bool IsVWMULSU = IsSignExt && Op1.getOpcode() == RISCVISD::VZEXT_VL;
if ((!IsSignExt && !IsZeroExt) || !Op0.hasOneUse())
return SDValue();
@@ -7316,7 +7396,7 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);
// See if the other operand is the same opcode.
- if (Op0.getOpcode() == Op1.getOpcode()) {
+ if (IsVWMULSU || Op0.getOpcode() == Op1.getOpcode()) {
if (!Op1.hasOneUse())
return SDValue();
@@ -7366,7 +7446,9 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
if (Op1.getValueType() != NarrowVT)
Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
- unsigned WMulOpc = IsSignExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
+ unsigned WMulOpc = RISCVISD::VWMULSU_VL;
+ if (!IsVWMULSU)
+ WMulOpc = IsSignExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL);
}
@@ -8194,12 +8276,17 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
break;
}
- case RISCVISD::READ_VLENB:
- // We assume VLENB is at least 16 bytes.
- Known.Zero.setLowBits(4);
+ case RISCVISD::READ_VLENB: {
+ // If we know the minimum VLen from Zvl extensions, we can use that to
+ // determine the trailing zeros of VLENB.
+ // FIXME: Limit to 128 bit vectors until we have more testing.
+ unsigned MinVLenB = std::min(128U, Subtarget.getMinVLen()) / 8;
+ if (MinVLenB > 0)
+ Known.Zero.setLowBits(Log2_32(MinVLenB));
// We assume VLENB is no more than 65536 / 8 bytes.
Known.Zero.setBitsFrom(14);
break;
+ }
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo =
@@ -8230,9 +8317,11 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
default:
break;
case RISCVISD::SELECT_CC: {
- unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
+ unsigned Tmp =
+ DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
if (Tmp == 1) return 1; // Early out.
- unsigned Tmp2 = DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
+ unsigned Tmp2 =
+ DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
return std::min(Tmp, Tmp2);
}
case RISCVISD::SLLW:
@@ -8275,15 +8364,18 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
}
break;
}
- case RISCVISD::VMV_X_S:
+ case RISCVISD::VMV_X_S: {
// The number of sign bits of the scalar result is computed by obtaining the
// element type of the input vector operand, subtracting its width from the
// XLEN, and then adding one (sign bit within the element type). If the
// element type is wider than XLen, the least-significant XLEN bits are
// taken.
- if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen())
- return 1;
- return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
+ unsigned XLen = Subtarget.getXLen();
+ unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
+ if (EltBits <= XLen)
+ return XLen - EltBits + 1;
+ break;
+ }
}
return 1;
@@ -10129,6 +10221,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FP_ROUND_VL)
NODE_NAME_CASE(VWMUL_VL)
NODE_NAME_CASE(VWMULU_VL)
+ NODE_NAME_CASE(VWMULSU_VL)
NODE_NAME_CASE(VWADDU_VL)
NODE_NAME_CASE(SETCC_VL)
NODE_NAME_CASE(VSELECT_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 58b7ec89f875..840a821870a7 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -245,6 +245,7 @@ enum NodeType : unsigned {
// Widening instructions
VWMUL_VL,
VWMULU_VL,
+ VWMULSU_VL,
VWADDU_VL,
// Vector compare producing a mask. Fourth operand is input mask. Fifth
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index d39e0805a79c..649eb57b325b 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -999,6 +999,12 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
VSETVLIInfo CurInfo;
+ // BBLocalInfo tracks the VL/VTYPE state the same way BBInfo.Change was
+ // calculated in computeIncomingVLVTYPE. We need this to apply
+ // canSkipVSETVLIForLoadStore the same way computeIncomingVLVTYPE did. We
+ // can't include predecessor information in that decision to avoid disagreeing
+ // with the global analysis.
+ VSETVLIInfo BBLocalInfo;
// Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI.
MachineInstr *PrevVSETVLIMI = nullptr;
@@ -1014,6 +1020,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
MI.getOperand(3).setIsDead(false);
MI.getOperand(4).setIsDead(false);
CurInfo = getInfoForVSETVLI(MI);
+ BBLocalInfo = getInfoForVSETVLI(MI);
PrevVSETVLIMI = &MI;
continue;
}
@@ -1043,12 +1050,22 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
// use the predecessor information.
assert(BlockInfo[MBB.getNumber()].Pred.isValid() &&
"Expected a valid predecessor state.");
- if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) &&
+ // Don't use predecessor information if there was an earlier instruction
+ // in this block that allowed a vsetvli to be skipped for load/store.
+ if (!(BBLocalInfo.isValid() &&
+ canSkipVSETVLIForLoadStore(MI, NewInfo, BBLocalInfo)) &&
+ needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) &&
needVSETVLIPHI(NewInfo, MBB)) {
insertVSETVLI(MBB, MI, NewInfo, BlockInfo[MBB.getNumber()].Pred);
CurInfo = NewInfo;
+ BBLocalInfo = NewInfo;
}
+
+ // We must update BBLocalInfo for every vector instruction.
+ if (!BBLocalInfo.isValid())
+ BBLocalInfo = NewInfo;
} else {
+ assert(BBLocalInfo.isValid());
// If this instruction isn't compatible with the previous VL/VTYPE
// we need to insert a VSETVLI.
// If this is a unit-stride or strided load/store, we may be able to use
@@ -1084,6 +1101,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
if (NeedInsertVSETVLI)
insertVSETVLI(MBB, MI, NewInfo, CurInfo);
CurInfo = NewInfo;
+ BBLocalInfo = NewInfo;
}
}
PrevVSETVLIMI = nullptr;
@@ -1094,6 +1112,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
MI.modifiesRegister(RISCV::VTYPE)) {
CurInfo = VSETVLIInfo::getUnknown();
+ BBLocalInfo = VSETVLIInfo::getUnknown();
PrevVSETVLIMI = nullptr;
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 7baed2793e4e..55f4a19b79eb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -654,8 +654,8 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, DL, get(RISCV::LUI), Result)
.addImm(Inst.Imm)
.setMIFlag(Flag);
- } else if (Inst.Opc == RISCV::ADDUW) {
- BuildMI(MBB, MBBI, DL, get(RISCV::ADDUW), Result)
+ } else if (Inst.Opc == RISCV::ADD_UW) {
+ BuildMI(MBB, MBBI, DL, get(RISCV::ADD_UW), Result)
.addReg(SrcReg, RegState::Kill)
.addReg(RISCV::X0)
.setMIFlag(Flag);
@@ -965,93 +965,29 @@ bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
}
unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
+ if (MI.isMetaInstruction())
+ return 0;
+
unsigned Opcode = MI.getOpcode();
- switch (Opcode) {
- default: {
- if (MI.getParent() && MI.getParent()->getParent()) {
- const auto MF = MI.getMF();
- const auto &TM = static_cast<const RISCVTargetMachine &>(MF->getTarget());
- const MCRegisterInfo &MRI = *TM.getMCRegisterInfo();
- const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo();
- const RISCVSubtarget &ST = MF->getSubtarget<RISCVSubtarget>();
- if (isCompressibleInst(MI, &ST, MRI, STI))
- return 2;
- }
- return get(Opcode).getSize();
- }
- case TargetOpcode::EH_LABEL:
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- case TargetOpcode::DBG_VALUE:
- return 0;
- // These values are determined based on RISCVExpandAtomicPseudoInsts,
- // RISCVExpandPseudoInsts and RISCVMCCodeEmitter, depending on where the
- // pseudos are expanded.
- case RISCV::PseudoCALLReg:
- case RISCV::PseudoCALL:
- case RISCV::PseudoJump:
- case RISCV::PseudoTAIL:
- case RISCV::PseudoLLA:
- case RISCV::PseudoLA:
- case RISCV::PseudoLA_TLS_IE:
- case RISCV::PseudoLA_TLS_GD:
- return 8;
- case RISCV::PseudoAtomicLoadNand32:
- case RISCV::PseudoAtomicLoadNand64:
- return 20;
- case RISCV::PseudoMaskedAtomicSwap32:
- case RISCV::PseudoMaskedAtomicLoadAdd32:
- case RISCV::PseudoMaskedAtomicLoadSub32:
- return 28;
- case RISCV::PseudoMaskedAtomicLoadNand32:
- return 32;
- case RISCV::PseudoMaskedAtomicLoadMax32:
- case RISCV::PseudoMaskedAtomicLoadMin32:
- return 44;
- case RISCV::PseudoMaskedAtomicLoadUMax32:
- case RISCV::PseudoMaskedAtomicLoadUMin32:
- return 36;
- case RISCV::PseudoCmpXchg32:
- case RISCV::PseudoCmpXchg64:
- return 16;
- case RISCV::PseudoMaskedCmpXchg32:
- return 32;
- case TargetOpcode::INLINEASM:
- case TargetOpcode::INLINEASM_BR: {
+ if (Opcode == TargetOpcode::INLINEASM ||
+ Opcode == TargetOpcode::INLINEASM_BR) {
const MachineFunction &MF = *MI.getParent()->getParent();
const auto &TM = static_cast<const RISCVTargetMachine &>(MF.getTarget());
return getInlineAsmLength(MI.getOperand(0).getSymbolName(),
*TM.getMCAsmInfo());
}
- case RISCV::PseudoVSPILL2_M1:
- case RISCV::PseudoVSPILL2_M2:
- case RISCV::PseudoVSPILL2_M4:
- case RISCV::PseudoVSPILL3_M1:
- case RISCV::PseudoVSPILL3_M2:
- case RISCV::PseudoVSPILL4_M1:
- case RISCV::PseudoVSPILL4_M2:
- case RISCV::PseudoVSPILL5_M1:
- case RISCV::PseudoVSPILL6_M1:
- case RISCV::PseudoVSPILL7_M1:
- case RISCV::PseudoVSPILL8_M1:
- case RISCV::PseudoVRELOAD2_M1:
- case RISCV::PseudoVRELOAD2_M2:
- case RISCV::PseudoVRELOAD2_M4:
- case RISCV::PseudoVRELOAD3_M1:
- case RISCV::PseudoVRELOAD3_M2:
- case RISCV::PseudoVRELOAD4_M1:
- case RISCV::PseudoVRELOAD4_M2:
- case RISCV::PseudoVRELOAD5_M1:
- case RISCV::PseudoVRELOAD6_M1:
- case RISCV::PseudoVRELOAD7_M1:
- case RISCV::PseudoVRELOAD8_M1: {
- // The values are determined based on expandVSPILL and expandVRELOAD that
- // expand the pseudos depending on NF.
- unsigned NF = isRVVSpillForZvlsseg(Opcode)->first;
- return 4 * (2 * NF - 1);
- }
+
+ if (MI.getParent() && MI.getParent()->getParent()) {
+ const auto MF = MI.getMF();
+ const auto &TM = static_cast<const RISCVTargetMachine &>(MF->getTarget());
+ const MCRegisterInfo &MRI = *TM.getMCRegisterInfo();
+ const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo();
+ const RISCVSubtarget &ST = MF->getSubtarget<RISCVSubtarget>();
+ if (isCompressibleInst(MI, &ST, MRI, STI))
+ return 2;
}
+ return get(Opcode).getSize();
}
bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 64cd89cda06a..ee6a74b7f14f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1183,7 +1183,7 @@ def : Pat<(brind (add GPRJALR:$rs1, simm12:$imm12)),
// destination.
// Define AsmString to print "call" when compile with -S flag.
// Define isCodeGenOnly = 0 to support parsing assembly "call" instruction.
-let isCall = 1, isBarrier = 1, isCodeGenOnly = 0, hasSideEffects = 0,
+let isCall = 1, isBarrier = 1, isCodeGenOnly = 0, Size = 8, hasSideEffects = 0,
mayStore = 0, mayLoad = 0 in
def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), []> {
let AsmString = "call\t$rd, $func";
@@ -1195,7 +1195,7 @@ def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), []> {
// if the offset fits in a signed 21-bit immediate.
// Define AsmString to print "call" when compile with -S flag.
// Define isCodeGenOnly = 0 to support parsing assembly "call" instruction.
-let isCall = 1, Defs = [X1], isCodeGenOnly = 0 in
+let isCall = 1, Defs = [X1], isCodeGenOnly = 0, Size = 8 in
def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []> {
let AsmString = "call\t$func";
}
@@ -1220,7 +1220,7 @@ def PseudoRET : Pseudo<(outs), (ins), [(riscv_ret_flag)]>,
// expand to auipc and jalr while encoding.
// Define AsmString to print "tail" when compile with -S flag.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [X2],
- isCodeGenOnly = 0 in
+ Size = 8, isCodeGenOnly = 0 in
def PseudoTAIL : Pseudo<(outs), (ins call_symbol:$dst), []> {
let AsmString = "tail\t$dst";
}
@@ -1235,28 +1235,28 @@ def : Pat<(riscv_tail (iPTR tglobaladdr:$dst)),
def : Pat<(riscv_tail (iPTR texternalsym:$dst)),
(PseudoTAIL texternalsym:$dst)>;
-let isCall = 0, isBarrier = 1, isBranch = 1, isTerminator = 1,
+let isCall = 0, isBarrier = 1, isBranch = 1, isTerminator = 1, Size = 8,
isCodeGenOnly = 0, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in
def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), []> {
let AsmString = "jump\t$target, $rd";
}
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0,
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"lla", "$dst, $src">;
-let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
def PseudoLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la", "$dst, $src">;
-let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la.tls.ie", "$dst, $src">;
-let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la.tls.gd", "$dst, $src">;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index ee10c3a54b2f..7d23dafb0346 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -188,6 +188,7 @@ class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch),
let hasSideEffects = 0;
}
+let Size = 20 in
def PseudoAtomicLoadNand32 : PseudoAMO;
// Ordering constants must be kept in sync with the AtomicOrdering enum in
// AtomicOrdering.h.
@@ -242,27 +243,35 @@ class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
timm:$ordering)>;
+let Size = 28 in
def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32,
PseudoMaskedAtomicSwap32>;
+let Size = 28 in
def PseudoMaskedAtomicLoadAdd32 : PseudoMaskedAMO;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i32,
PseudoMaskedAtomicLoadAdd32>;
+let Size = 28 in
def PseudoMaskedAtomicLoadSub32 : PseudoMaskedAMO;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i32,
PseudoMaskedAtomicLoadSub32>;
+let Size = 32 in
def PseudoMaskedAtomicLoadNand32 : PseudoMaskedAMO;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i32,
PseudoMaskedAtomicLoadNand32>;
+let Size = 44 in
def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMOMinMax;
def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i32,
PseudoMaskedAtomicLoadMax32>;
+let Size = 44 in
def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMOMinMax;
def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i32,
PseudoMaskedAtomicLoadMin32>;
+let Size = 36 in
def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMOUMinUMax;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i32,
PseudoMaskedAtomicLoadUMax32>;
+let Size = 36 in
def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32,
PseudoMaskedAtomicLoadUMin32>;
@@ -276,6 +285,7 @@ class PseudoCmpXchg
let mayLoad = 1;
let mayStore = 1;
let hasSideEffects = 0;
+ let Size = 16;
}
// Ordering constants must be kept in sync with the AtomicOrdering enum in
@@ -304,6 +314,7 @@ def PseudoMaskedCmpXchg32
let mayLoad = 1;
let mayStore = 1;
let hasSideEffects = 0;
+ let Size = 32;
}
def : Pat<(int_riscv_masked_cmpxchg_i32
@@ -347,6 +358,7 @@ def : Pat<(i64 (atomic_load_sub_64_seq_cst GPR:$addr, GPR:$incr)),
/// 64-bit pseudo AMOs
+let Size = 20 in
def PseudoAtomicLoadNand64 : PseudoAMO;
// Ordering constants must be kept in sync with the AtomicOrdering enum in
// AtomicOrdering.h.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 4e7e251bc412..9087ed50f9fc 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -3836,7 +3836,7 @@ multiclass VPatConversionVF_WF <string intrinsic, string instruction> {
}
multiclass VPatCompare_VI<string intrinsic, string inst,
- ImmLeaf ImmType = simm5_plus1> {
+ ImmLeaf ImmType> {
foreach vti = AllIntegerVectors in {
defvar Intr = !cast<Intrinsic>(intrinsic);
defvar Pseudo = !cast<Instruction>(inst#"_VI_"#vti.LMul.MX);
@@ -3899,11 +3899,13 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1 in {
foreach lmul = MxList in {
foreach nf = NFSet<lmul>.L in {
defvar vreg = SegRegClass<lmul, nf>.RC;
- let hasSideEffects = 0, mayLoad = 0, mayStore = 1, isCodeGenOnly = 1 in {
+ let hasSideEffects = 0, mayLoad = 0, mayStore = 1, isCodeGenOnly = 1,
+ Size = !mul(4, !sub(!mul(nf, 2), 1)) in {
def "PseudoVSPILL" # nf # "_" # lmul.MX :
Pseudo<(outs), (ins vreg:$rs1, GPR:$rs2, GPR:$vlenb), []>;
}
- let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1 in {
+ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1,
+ Size = !mul(4, !sub(!mul(nf, 2), 1)) in {
def "PseudoVRELOAD" # nf # "_" # lmul.MX :
Pseudo<(outs vreg:$rs1), (ins GPR:$rs2, GPR:$vlenb), []>;
}
@@ -4657,13 +4659,15 @@ defm : VPatBinarySwappedM_VV<"int_riscv_vmsgt", "PseudoVMSLT", AllIntegerVectors
defm : VPatBinarySwappedM_VV<"int_riscv_vmsgeu", "PseudoVMSLEU", AllIntegerVectors>;
defm : VPatBinarySwappedM_VV<"int_riscv_vmsge", "PseudoVMSLE", AllIntegerVectors>;
-// Match vmslt(u).vx intrinsics to vmsle(u).vi if the scalar is -15 to 16. This
-// avoids the user needing to know that there is no vmslt(u).vi instruction.
-// Similar for vmsge(u).vx intrinsics using vmslt(u).vi.
-defm : VPatCompare_VI<"int_riscv_vmslt", "PseudoVMSLE">;
+// Match vmslt(u).vx intrinsics to vmsle(u).vi if the scalar is -15 to 16 and
+// non-zero. Zero can be .vx with x0. This avoids the user needing to know that
+// there is no vmslt(u).vi instruction. Similar for vmsge(u).vx intrinsics
+// using vmslt(u).vi.
+defm : VPatCompare_VI<"int_riscv_vmslt", "PseudoVMSLE", simm5_plus1_nonzero>;
defm : VPatCompare_VI<"int_riscv_vmsltu", "PseudoVMSLEU", simm5_plus1_nonzero>;
-defm : VPatCompare_VI<"int_riscv_vmsge", "PseudoVMSGT">;
+// We need to handle 0 for vmsge.vi using vmslt.vi because there is no vmsge.vx.
+defm : VPatCompare_VI<"int_riscv_vmsge", "PseudoVMSGT", simm5_plus1>;
defm : VPatCompare_VI<"int_riscv_vmsgeu", "PseudoVMSGTU", simm5_plus1_nonzero>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index e452a84a9a6f..2b920d29ab81 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -539,7 +539,7 @@ defm : VPatIntegerSetCCSDNode_VV_VX_VI<SETNE, "PseudoVMSNE">;
defm : VPatIntegerSetCCSDNode_VV_VX<SETLT, "PseudoVMSLT">;
defm : VPatIntegerSetCCSDNode_VV_VX<SETULT, "PseudoVMSLTU">;
defm : VPatIntegerSetCCSDNode_VIPlus1<SETLT, "PseudoVMSLE",
- SplatPat_simm5_plus1>;
+ SplatPat_simm5_plus1_nonzero>;
defm : VPatIntegerSetCCSDNode_VIPlus1<SETULT, "PseudoVMSLEU",
SplatPat_simm5_plus1_nonzero>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 964f0fa54512..e71c498fd5f4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -228,6 +228,7 @@ def SDT_RISCVVWBinOp_VL : SDTypeProfile<1, 4, [SDTCisVec<0>,
SDTCisVT<4, XLenVT>]>;
def riscv_vwmul_vl : SDNode<"RISCVISD::VWMUL_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
def riscv_vwmulu_vl : SDNode<"RISCVISD::VWMULU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
+def riscv_vwmulsu_vl : SDNode<"RISCVISD::VWMULSU_VL", SDT_RISCVVWBinOp_VL>;
def riscv_vwaddu_vl : SDNode<"RISCVISD::VWADDU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
def SDTRVVVecReduce : SDTypeProfile<1, 5, [
@@ -832,7 +833,7 @@ foreach vti = AllIntegerVectors in {
defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSGTU", SETUGT, SETULT>;
defm : VPatIntegerSetCCVL_VIPlus1<vti, "PseudoVMSLE", SETLT,
- SplatPat_simm5_plus1>;
+ SplatPat_simm5_plus1_nonzero>;
defm : VPatIntegerSetCCVL_VIPlus1<vti, "PseudoVMSLEU", SETULT,
SplatPat_simm5_plus1_nonzero>;
defm : VPatIntegerSetCCVL_VIPlus1<vti, "PseudoVMSGT", SETGE,
@@ -861,6 +862,7 @@ defm : VPatBinaryVL_VV_VX<riscv_srem_vl, "PseudoVREM">;
// 12.12. Vector Widening Integer Multiply Instructions
defm : VPatBinaryWVL_VV_VX<riscv_vwmul_vl, "PseudoVWMUL">;
defm : VPatBinaryWVL_VV_VX<riscv_vwmulu_vl, "PseudoVWMULU">;
+defm : VPatBinaryWVL_VV_VX<riscv_vwmulsu_vl, "PseudoVWMULSU">;
// 12.13 Vector Single-Width Integer Multiply-Add Instructions
foreach vti = AllIntegerVectors in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index db3f5851879a..07884d35f63c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -337,13 +337,39 @@ def SH3ADD : ALU_rr<0b0010000, 0b110, "sh3add">,
Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>;
} // Predicates = [HasStdExtZba]
+let Predicates = [HasStdExtZba, IsRV64] in {
+def SLLI_UW : RVBShift_ri<0b00001, 0b001, OPC_OP_IMM_32, "slli.uw">,
+ Sched<[WriteShiftImm32, ReadShiftImm32]>;
+def ADD_UW : ALUW_rr<0b0000100, 0b000, "add.uw">,
+ Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
+def SH1ADD_UW : ALUW_rr<0b0010000, 0b010, "sh1add.uw">,
+ Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
+def SH2ADD_UW : ALUW_rr<0b0010000, 0b100, "sh2add.uw">,
+ Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
+def SH3ADD_UW : ALUW_rr<0b0010000, 0b110, "sh3add.uw">,
+ Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
let Predicates = [HasStdExtZbbOrZbpOrZbkb] in {
def ROL : ALU_rr<0b0110000, 0b001, "rol">,
Sched<[WriteRotateReg, ReadRotateReg, ReadRotateReg]>;
def ROR : ALU_rr<0b0110000, 0b101, "ror">,
Sched<[WriteRotateReg, ReadRotateReg, ReadRotateReg]>;
+
+def RORI : RVBShift_ri<0b01100, 0b101, OPC_OP_IMM, "rori">,
+ Sched<[WriteRotateImm, ReadRotateImm]>;
} // Predicates = [HasStdExtZbbOrZbpOrZbkb]
+let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in {
+def ROLW : ALUW_rr<0b0110000, 0b001, "rolw">,
+ Sched<[WriteRotateReg32, ReadRotateReg32, ReadRotateReg32]>;
+def RORW : ALUW_rr<0b0110000, 0b101, "rorw">,
+ Sched<[WriteRotateReg32, ReadRotateReg32, ReadRotateReg32]>;
+
+def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">,
+ Sched<[WriteRotateImm32, ReadRotateImm32]>;
+} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
+
let Predicates = [HasStdExtZbs] in {
def BCLR : ALU_rr<0b0100100, 0b001, "bclr">,
Sched<[WriteSingleBit, ReadSingleBit, ReadSingleBit]>;
@@ -353,27 +379,7 @@ def BINV : ALU_rr<0b0110100, 0b001, "binv">,
Sched<[WriteSingleBit, ReadSingleBit, ReadSingleBit]>;
def BEXT : ALU_rr<0b0100100, 0b101, "bext">,
Sched<[WriteSingleBit, ReadSingleBit, ReadSingleBit]>;
-} // Predicates = [HasStdExtZbs]
-
-let Predicates = [HasStdExtZbp] in {
-def GORC : ALU_rr<0b0010100, 0b101, "gorc">, Sched<[]>;
-def GREV : ALU_rr<0b0110100, 0b101, "grev">, Sched<[]>;
-} // Predicates = [HasStdExtZbp]
-let Predicates = [HasStdExtZbpOrZbkx] in {
-def XPERMN : ALU_rr<0b0010100, 0b010, "xperm4">, Sched<[]>;
-def XPERMB : ALU_rr<0b0010100, 0b100, "xperm8">, Sched<[]>;
-} // Predicates = [HasStdExtZbpOrZbkx]
-
-let Predicates = [HasStdExtZbp] in {
-def XPERMH : ALU_rr<0b0010100, 0b110, "xperm.h">, Sched<[]>;
-} // Predicates = [HasStdExtZbp]
-
-let Predicates = [HasStdExtZbbOrZbpOrZbkb] in
-def RORI : RVBShift_ri<0b01100, 0b101, OPC_OP_IMM, "rori">,
- Sched<[WriteRotateImm, ReadRotateImm]>;
-
-let Predicates = [HasStdExtZbs] in {
def BCLRI : RVBShift_ri<0b01001, 0b001, OPC_OP_IMM, "bclri">,
Sched<[WriteSingleBitImm, ReadSingleBitImm]>;
def BSETI : RVBShift_ri<0b00101, 0b001, OPC_OP_IMM, "bseti">,
@@ -385,10 +391,42 @@ def BEXTI : RVBShift_ri<0b01001, 0b101, OPC_OP_IMM, "bexti">,
} // Predicates = [HasStdExtZbs]
let Predicates = [HasStdExtZbp] in {
+def GORC : ALU_rr<0b0010100, 0b101, "gorc">, Sched<[]>;
+def GREV : ALU_rr<0b0110100, 0b101, "grev">, Sched<[]>;
+
def GREVI : RVBShift_ri<0b01101, 0b101, OPC_OP_IMM, "grevi">, Sched<[]>;
def GORCI : RVBShift_ri<0b00101, 0b101, OPC_OP_IMM, "gorci">, Sched<[]>;
+
+def SHFL : ALU_rr<0b0000100, 0b001, "shfl">, Sched<[]>;
+def UNSHFL : ALU_rr<0b0000100, 0b101, "unshfl">, Sched<[]>;
+
+def SHFLI : RVBShfl_ri<0b0000100, 0b001, OPC_OP_IMM, "shfli">, Sched<[]>;
+def UNSHFLI : RVBShfl_ri<0b0000100, 0b101, OPC_OP_IMM, "unshfli">, Sched<[]>;
+
+def XPERM_H : ALU_rr<0b0010100, 0b110, "xperm.h">, Sched<[]>;
} // Predicates = [HasStdExtZbp]
+let Predicates = [HasStdExtZbp, IsRV64] in {
+def GORCW : ALUW_rr<0b0010100, 0b101, "gorcw">, Sched<[]>;
+def GREVW : ALUW_rr<0b0110100, 0b101, "grevw">, Sched<[]>;
+
+def GORCIW : RVBShiftW_ri<0b0010100, 0b101, OPC_OP_IMM_32, "gorciw">, Sched<[]>;
+def GREVIW : RVBShiftW_ri<0b0110100, 0b101, OPC_OP_IMM_32, "greviw">, Sched<[]>;
+
+def SHFLW : ALUW_rr<0b0000100, 0b001, "shflw">, Sched<[]>;
+def UNSHFLW : ALUW_rr<0b0000100, 0b101, "unshflw">, Sched<[]>;
+
+def XPERM_W : ALU_rr<0b0010100, 0b000, "xperm.w">, Sched<[]>;
+} // Predicates = [HasStdExtZbp, IsRV64]
+
+// These instructions were named xperm.n and xperm.b in the last version of
+// the draft bit manipulation specification they were included in. However, we
+// use the mnemonics given to them in the ratified Zbkx extension.
+let Predicates = [HasStdExtZbpOrZbkx] in {
+def XPERM4 : ALU_rr<0b0010100, 0b010, "xperm4">, Sched<[]>;
+def XPERM8 : ALU_rr<0b0010100, 0b100, "xperm8">, Sched<[]>;
+} // Predicates = [HasStdExtZbpOrZbkx]
+
let Predicates = [HasStdExtZbt] in {
def CMIX : RVBTernaryR<0b11, 0b001, OPC_OP, "cmix", "$rd, $rs2, $rs1, $rs3">,
Sched<[]>;
@@ -402,6 +440,15 @@ def FSRI : RVBTernaryImm6<0b101, OPC_OP_IMM, "fsri",
"$rd, $rs1, $rs3, $shamt">, Sched<[]>;
} // Predicates = [HasStdExtZbt]
+let Predicates = [HasStdExtZbt, IsRV64] in {
+def FSLW : RVBTernaryR<0b10, 0b001, OPC_OP_32,
+ "fslw", "$rd, $rs1, $rs3, $rs2">, Sched<[]>;
+def FSRW : RVBTernaryR<0b10, 0b101, OPC_OP_32, "fsrw",
+ "$rd, $rs1, $rs3, $rs2">, Sched<[]>;
+def FSRIW : RVBTernaryImm5<0b10, 0b101, OPC_OP_IMM_32,
+ "fsriw", "$rd, $rs1, $rs3, $shamt">, Sched<[]>;
+} // Predicates = [HasStdExtZbt, IsRV64]
+
let Predicates = [HasStdExtZbb] in {
def CLZ : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM, "clz">,
Sched<[WriteCLZ, ReadCLZ]>;
@@ -411,42 +458,45 @@ def CPOP : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM, "cpop">,
Sched<[WriteCPOP, ReadCPOP]>;
} // Predicates = [HasStdExtZbb]
-let Predicates = [HasStdExtZbm, IsRV64] in
-def BMATFLIP : RVBUnary<0b0110000, 0b00011, 0b001, OPC_OP_IMM, "bmatflip">,
- Sched<[]>;
+let Predicates = [HasStdExtZbb, IsRV64] in {
+def CLZW : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM_32, "clzw">,
+ Sched<[WriteCLZ32, ReadCLZ32]>;
+def CTZW : RVBUnary<0b0110000, 0b00001, 0b001, OPC_OP_IMM_32, "ctzw">,
+ Sched<[WriteCTZ32, ReadCTZ32]>;
+def CPOPW : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM_32, "cpopw">,
+ Sched<[WriteCPOP32, ReadCPOP32]>;
+} // Predicates = [HasStdExtZbb, IsRV64]
let Predicates = [HasStdExtZbb] in {
-def SEXTB : RVBUnary<0b0110000, 0b00100, 0b001, OPC_OP_IMM, "sext.b">,
- Sched<[WriteIALU, ReadIALU]>;
-def SEXTH : RVBUnary<0b0110000, 0b00101, 0b001, OPC_OP_IMM, "sext.h">,
- Sched<[WriteIALU, ReadIALU]>;
+def SEXT_B : RVBUnary<0b0110000, 0b00100, 0b001, OPC_OP_IMM, "sext.b">,
+ Sched<[WriteIALU, ReadIALU]>;
+def SEXT_H : RVBUnary<0b0110000, 0b00101, 0b001, OPC_OP_IMM, "sext.h">,
+ Sched<[WriteIALU, ReadIALU]>;
} // Predicates = [HasStdExtZbb]
let Predicates = [HasStdExtZbr] in {
-def CRC32B : RVBUnary<0b0110000, 0b10000, 0b001, OPC_OP_IMM, "crc32.b">,
- Sched<[]>;
-def CRC32H : RVBUnary<0b0110000, 0b10001, 0b001, OPC_OP_IMM, "crc32.h">,
- Sched<[]>;
-def CRC32W : RVBUnary<0b0110000, 0b10010, 0b001, OPC_OP_IMM, "crc32.w">,
- Sched<[]>;
-} // Predicates = [HasStdExtZbr]
-
-let Predicates = [HasStdExtZbr, IsRV64] in
-def CRC32D : RVBUnary<0b0110000, 0b10011, 0b001, OPC_OP_IMM, "crc32.d">,
+def CRC32_B : RVBUnary<0b0110000, 0b10000, 0b001, OPC_OP_IMM, "crc32.b">,
Sched<[]>;
-
-let Predicates = [HasStdExtZbr] in {
-def CRC32CB : RVBUnary<0b0110000, 0b11000, 0b001, OPC_OP_IMM, "crc32c.b">,
+def CRC32_H : RVBUnary<0b0110000, 0b10001, 0b001, OPC_OP_IMM, "crc32.h">,
Sched<[]>;
-def CRC32CH : RVBUnary<0b0110000, 0b11001, 0b001, OPC_OP_IMM, "crc32c.h">,
- Sched<[]>;
-def CRC32CW : RVBUnary<0b0110000, 0b11010, 0b001, OPC_OP_IMM, "crc32c.w">,
+def CRC32_W : RVBUnary<0b0110000, 0b10010, 0b001, OPC_OP_IMM, "crc32.w">,
Sched<[]>;
+
+def CRC32C_B : RVBUnary<0b0110000, 0b11000, 0b001, OPC_OP_IMM, "crc32c.b">,
+ Sched<[]>;
+def CRC32C_H : RVBUnary<0b0110000, 0b11001, 0b001, OPC_OP_IMM, "crc32c.h">,
+ Sched<[]>;
+def CRC32C_W : RVBUnary<0b0110000, 0b11010, 0b001, OPC_OP_IMM, "crc32c.w">,
+ Sched<[]>;
} // Predicates = [HasStdExtZbr]
-let Predicates = [HasStdExtZbr, IsRV64] in
-def CRC32CD : RVBUnary<0b0110000, 0b11011, 0b001, OPC_OP_IMM, "crc32c.d">,
- Sched<[]>;
+let Predicates = [HasStdExtZbr, IsRV64] in {
+def CRC32_D : RVBUnary<0b0110000, 0b10011, 0b001, OPC_OP_IMM, "crc32.d">,
+ Sched<[]>;
+
+def CRC32C_D : RVBUnary<0b0110000, 0b11011, 0b001, OPC_OP_IMM, "crc32c.d">,
+ Sched<[]>;
+} // Predicates = [HasStdExtZbr, IsRV64]
let Predicates = [HasStdExtZbc] in {
def CLMULR : ALU_rr<0b0000101, 0b010, "clmulr">,
@@ -472,8 +522,6 @@ def MAXU : ALU_rr<0b0000101, 0b111, "maxu">,
} // Predicates = [HasStdExtZbb]
let Predicates = [HasStdExtZbp] in {
-def SHFL : ALU_rr<0b0000100, 0b001, "shfl">, Sched<[]>;
-def UNSHFL : ALU_rr<0b0000100, 0b101, "unshfl">, Sched<[]>;
} // Predicates = [HasStdExtZbp]
let Predicates = [HasStdExtZbe] in {
@@ -483,15 +531,31 @@ def BDECOMPRESS : ALU_rr<0b0100100, 0b110, "bdecompress">, Sched<[]>;
def BCOMPRESS : ALU_rr<0b0000100, 0b110, "bcompress">, Sched<[]>;
} // Predicates = [HasStdExtZbe]
+let Predicates = [HasStdExtZbe, IsRV64] in {
+// NOTE: These mnemonics are from the 0.94 spec. There is a name conflict with
+// bextw in the 0.93 spec.
+def BDECOMPRESSW : ALUW_rr<0b0100100, 0b110, "bdecompressw">, Sched<[]>;
+def BCOMPRESSW : ALUW_rr<0b0000100, 0b110, "bcompressw">, Sched<[]>;
+} // Predicates = [HasStdExtZbe, IsRV64]
+
let Predicates = [HasStdExtZbpOrZbkb] in {
def PACK : ALU_rr<0b0000100, 0b100, "pack">, Sched<[]>;
def PACKH : ALU_rr<0b0000100, 0b111, "packh">, Sched<[]>;
} // Predicates = [HasStdExtZbpOrZbkb]
+let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in
+def PACKW : ALUW_rr<0b0000100, 0b100, "packw">, Sched<[]>;
+
let Predicates = [HasStdExtZbp] in
def PACKU : ALU_rr<0b0100100, 0b100, "packu">, Sched<[]>;
+let Predicates = [HasStdExtZbp, IsRV64] in
+def PACKUW : ALUW_rr<0b0100100, 0b100, "packuw">, Sched<[]>;
+
let Predicates = [HasStdExtZbm, IsRV64] in {
+def BMATFLIP : RVBUnary<0b0110000, 0b00011, 0b001, OPC_OP_IMM, "bmatflip">,
+ Sched<[]>;
+
def BMATOR : ALU_rr<0b0000100, 0b011, "bmator">, Sched<[]>;
def BMATXOR : ALU_rr<0b0100100, 0b011, "bmatxor">, Sched<[]>;
} // Predicates = [HasStdExtZbm, IsRV64]
@@ -500,105 +564,18 @@ let Predicates = [HasStdExtZbf] in
def BFP : ALU_rr<0b0100100, 0b111, "bfp">,
Sched<[WriteBFP, ReadBFP, ReadBFP]>;
-let Predicates = [HasStdExtZbp] in {
-def SHFLI : RVBShfl_ri<0b0000100, 0b001, OPC_OP_IMM, "shfli">, Sched<[]>;
-def UNSHFLI : RVBShfl_ri<0b0000100, 0b101, OPC_OP_IMM, "unshfli">, Sched<[]>;
-} // Predicates = [HasStdExtZbp]
-
-let Predicates = [HasStdExtZba, IsRV64] in {
-def SLLIUW : RVBShift_ri<0b00001, 0b001, OPC_OP_IMM_32, "slli.uw">,
- Sched<[WriteShiftImm32, ReadShiftImm32]>;
-def ADDUW : ALUW_rr<0b0000100, 0b000, "add.uw">,
- Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
-def SH1ADDUW : ALUW_rr<0b0010000, 0b010, "sh1add.uw">,
- Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
-def SH2ADDUW : ALUW_rr<0b0010000, 0b100, "sh2add.uw">,
- Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
-def SH3ADDUW : ALUW_rr<0b0010000, 0b110, "sh3add.uw">,
- Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
-} // Predicates = [HasStdExtZbb, IsRV64]
-
-let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in {
-def ROLW : ALUW_rr<0b0110000, 0b001, "rolw">,
- Sched<[WriteRotateReg32, ReadRotateReg32, ReadRotateReg32]>;
-def RORW : ALUW_rr<0b0110000, 0b101, "rorw">,
- Sched<[WriteRotateReg32, ReadRotateReg32, ReadRotateReg32]>;
-} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
-
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def GORCW : ALUW_rr<0b0010100, 0b101, "gorcw">, Sched<[]>;
-def GREVW : ALUW_rr<0b0110100, 0b101, "grevw">, Sched<[]>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def XPERMW : ALU_rr<0b0010100, 0b000, "xperm.w">, Sched<[]>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
-let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in
-def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">,
- Sched<[WriteRotateImm32, ReadRotateImm32]>;
-
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def GORCIW : RVBShiftW_ri<0b0010100, 0b101, OPC_OP_IMM_32, "gorciw">, Sched<[]>;
-def GREVIW : RVBShiftW_ri<0b0110100, 0b101, OPC_OP_IMM_32, "greviw">, Sched<[]>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
-let Predicates = [HasStdExtZbt, IsRV64] in {
-def FSLW : RVBTernaryR<0b10, 0b001, OPC_OP_32,
- "fslw", "$rd, $rs1, $rs3, $rs2">, Sched<[]>;
-def FSRW : RVBTernaryR<0b10, 0b101, OPC_OP_32, "fsrw",
- "$rd, $rs1, $rs3, $rs2">, Sched<[]>;
-def FSRIW : RVBTernaryImm5<0b10, 0b101, OPC_OP_IMM_32,
- "fsriw", "$rd, $rs1, $rs3, $shamt">, Sched<[]>;
-} // Predicates = [HasStdExtZbt, IsRV64]
-
-let Predicates = [HasStdExtZbb, IsRV64] in {
-def CLZW : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM_32, "clzw">,
- Sched<[WriteCLZ32, ReadCLZ32]>;
-def CTZW : RVBUnary<0b0110000, 0b00001, 0b001, OPC_OP_IMM_32, "ctzw">,
- Sched<[WriteCTZ32, ReadCTZ32]>;
-def CPOPW : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM_32, "cpopw">,
- Sched<[WriteCPOP32, ReadCPOP32]>;
-} // Predicates = [HasStdExtZbb, IsRV64]
-
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def SHFLW : ALUW_rr<0b0000100, 0b001, "shflw">, Sched<[]>;
-def UNSHFLW : ALUW_rr<0b0000100, 0b101, "unshflw">, Sched<[]>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
-let Predicates = [HasStdExtZbe, IsRV64] in {
-// NOTE: These mnemonics are from the 0.94 spec. There is a name conflict with
-// bextw in the 0.93 spec.
-def BDECOMPRESSW : ALUW_rr<0b0100100, 0b110, "bdecompressw">, Sched<[]>;
-def BCOMPRESSW : ALUW_rr<0b0000100, 0b110, "bcompressw">, Sched<[]>;
-} // Predicates = [HasStdExtZbe, IsRV64]
-
-let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in
-def PACKW : ALUW_rr<0b0000100, 0b100, "packw">, Sched<[]>;
-
-let Predicates = [HasStdExtZbp, IsRV64] in
-def PACKUW : ALUW_rr<0b0100100, 0b100, "packuw">, Sched<[]>;
-
let Predicates = [HasStdExtZbf, IsRV64] in
def BFPW : ALUW_rr<0b0100100, 0b111, "bfpw">,
Sched<[WriteBFP32, ReadBFP32, ReadBFP32]>;
let Predicates = [HasStdExtZbbOrZbp, IsRV32] in {
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def ZEXTH_RV32 : RVInstR<0b0000100, 0b100, OPC_OP, (outs GPR:$rd),
- (ins GPR:$rs1), "zext.h", "$rd, $rs1">,
- Sched<[WriteIALU, ReadIALU]> {
- let rs2 = 0b00000;
-}
+def ZEXT_H_RV32 : RVBUnary<0b0000100, 0b00000, 0b100, OPC_OP, "zext.h">,
+ Sched<[WriteIALU, ReadIALU]>;
} // Predicates = [HasStdExtZbbOrZbp, IsRV32]
let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def ZEXTH_RV64 : RVInstR<0b0000100, 0b100, OPC_OP_32, (outs GPR:$rd),
- (ins GPR:$rs1), "zext.h", "$rd, $rs1">,
- Sched<[WriteIALU, ReadIALU]> {
- let rs2 = 0b00000;
-}
+def ZEXT_H_RV64 : RVBUnary<0b0000100, 0b00000, 0b100, OPC_OP_32, "zext.h">,
+ Sched<[WriteIALU, ReadIALU]>;
} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
// We treat rev8 and orc.b as standalone instructions even though they use a
@@ -619,8 +596,8 @@ def REV8_RV64 : RVBUnary<0b0110101, 0b11000, 0b101, OPC_OP_IMM, "rev8">,
} // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64]
let Predicates = [HasStdExtZbbOrZbp] in {
-def ORCB : RVBUnary<0b0010100, 0b00111, 0b101, OPC_OP_IMM, "orc.b">,
- Sched<[WriteORCB, ReadORCB]>;
+def ORC_B : RVBUnary<0b0010100, 0b00111, 0b101, OPC_OP_IMM, "orc.b">,
+ Sched<[WriteORCB, ReadORCB]>;
} // Predicates = [HasStdExtZbbOrZbp]
let Predicates = [HasStdExtZbpOrZbkb] in
@@ -637,7 +614,7 @@ def UNZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b101, OPC_OP_IMM, "unzip">;
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtZba, IsRV64] in {
-def : InstAlias<"zext.w $rd, $rs", (ADDUW GPR:$rd, GPR:$rs, X0)>;
+def : InstAlias<"zext.w $rd, $rs", (ADD_UW GPR:$rd, GPR:$rs, X0)>;
}
let Predicates = [HasStdExtZbp] in {
@@ -775,8 +752,10 @@ def : InstAlias<"gorcw $rd, $rs1, $shamt",
// Zbp is unratified and that it would likely adopt the already ratified Zbkx names.
// Thus current Zbp instructions are defined as aliases for Zbkx instructions.
let Predicates = [HasStdExtZbp] in {
- def : InstAlias<"xperm.b $rd, $rs1, $rs2", (XPERMB GPR:$rd, GPR:$rs1, GPR:$rs2)>;
- def : InstAlias<"xperm.n $rd, $rs1, $rs2", (XPERMN GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+ def : InstAlias<"xperm.b $rd, $rs1, $rs2",
+ (XPERM8 GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+ def : InstAlias<"xperm.n $rd, $rs1, $rs2",
+ (XPERM4 GPR:$rd, GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtZbp]
let Predicates = [HasStdExtZbs] in {
@@ -803,8 +782,22 @@ def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>;
let Predicates = [HasStdExtZbbOrZbpOrZbkb] in {
def : PatGprGpr<rotl, ROL>;
def : PatGprGpr<rotr, ROR>;
+
+def : PatGprImm<rotr, RORI, uimmlog2xlen>;
+// There's no encoding for roli in the the 'B' extension as it can be
+// implemented with rori by negating the immediate.
+def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt),
+ (RORI GPR:$rs1, (ImmSubFromXLen uimmlog2xlen:$shamt))>;
} // Predicates = [HasStdExtZbbOrZbpOrZbkb]
+let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in {
+def : PatGprGpr<riscv_rolw, ROLW>;
+def : PatGprGpr<riscv_rorw, RORW>;
+def : PatGprImm<riscv_rorw, RORIW, uimm5>;
+def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
+ (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>;
+} // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64]
+
let Predicates = [HasStdExtZbs] in {
def : Pat<(and (not (shiftop<shl> 1, GPR:$rs2)), GPR:$rs1),
(BCLR GPR:$rs1, GPR:$rs2)>;
@@ -852,48 +845,62 @@ def : Pat<(and GPR:$r, BCLRIANDIMask:$i),
(BCLRITwoBitsMaskHigh BCLRIANDIMask:$i))>;
}
-// There's no encoding for roli in the the 'B' extension as it can be
-// implemented with rori by negating the immediate.
-let Predicates = [HasStdExtZbbOrZbpOrZbkb] in {
-def : PatGprImm<rotr, RORI, uimmlog2xlen>;
-def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt),
- (RORI GPR:$rs1, (ImmSubFromXLen uimmlog2xlen:$shamt))>;
-
+let Predicates = [HasStdExtZbbOrZbp] in {
// We treat orc.b as a separate instruction, so match it directly. We also
// lower the Zbb orc.b intrinsic to this.
-def : Pat<(riscv_gorc GPR:$rs1, 7), (ORCB GPR:$rs1)>;
+def : Pat<(riscv_gorc GPR:$rs1, 7), (ORC_B GPR:$rs1)>;
+}
+
+let Predicates = [HasStdExtZbpOrZbkb] in {
+// We treat brev8 as a separate instruction, so match it directly. We also
+// use this for brev8 when lowering bitreverse with Zbkb.
+def : Pat<(riscv_grev GPR:$rs1, 7), (BREV8 GPR:$rs1)>;
+
+// We treat zip and unzip as separate instructions, so match it directly.
+def : Pat<(i32 (riscv_shfl GPR:$rs1, 15)), (ZIP_RV32 GPR:$rs1)>;
+def : Pat<(i32 (riscv_unshfl GPR:$rs1, 15)), (UNZIP_RV32 GPR:$rs1)>;
}
let Predicates = [HasStdExtZbp] in {
def : PatGprGpr<riscv_grev, GREV>;
def : PatGprGpr<riscv_gorc, GORC>;
+def : PatGprImm<riscv_grev, GREVI, uimmlog2xlen>;
+def : PatGprImm<riscv_gorc, GORCI, uimmlog2xlen>;
+
def : PatGprGpr<riscv_shfl, SHFL>;
def : PatGprGpr<riscv_unshfl, UNSHFL>;
-def : PatGprGpr<int_riscv_xperm_n, XPERMN>;
-def : PatGprGpr<int_riscv_xperm_b, XPERMB>;
-def : PatGprGpr<int_riscv_xperm_h, XPERMH>;
def : PatGprImm<riscv_shfl, SHFLI, shfl_uimm>;
def : PatGprImm<riscv_unshfl, UNSHFLI, shfl_uimm>;
-def : PatGprImm<riscv_grev, GREVI, uimmlog2xlen>;
-def : PatGprImm<riscv_gorc, GORCI, uimmlog2xlen>;
-// We treat brev8 as a separate instruction, so match it directly.
-def : Pat<(riscv_grev GPR:$rs1, 7), (BREV8 GPR:$rs1)>;
+def : PatGprGpr<int_riscv_xperm_n, XPERM4>;
+def : PatGprGpr<int_riscv_xperm_b, XPERM8>;
+def : PatGprGpr<int_riscv_xperm_h, XPERM_H>;
} // Predicates = [HasStdExtZbp]
+let Predicates = [HasStdExtZbp, IsRV64] in {
+def : PatGprGpr<riscv_grevw, GREVW>;
+def : PatGprGpr<riscv_gorcw, GORCW>;
+def : PatGprImm<riscv_grevw, GREVIW, uimm5>;
+def : PatGprImm<riscv_gorcw, GORCIW, uimm5>;
+
+// FIXME: Move to DAG combine.
+def : Pat<(riscv_rorw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>;
+def : Pat<(riscv_rolw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>;
+
+def : PatGprGpr<riscv_shflw, SHFLW>;
+def : PatGprGpr<riscv_unshflw, UNSHFLW>;
+} // Predicates = [HasStdExtZbp, IsRV64]
+
let Predicates = [HasStdExtZbp, IsRV64] in
-def : PatGprGpr<int_riscv_xperm_w, XPERMW>;
+def : PatGprGpr<int_riscv_xperm_w, XPERM_W>;
let Predicates = [HasStdExtZbp, IsRV32] in {
+// FIXME : Move to DAG combine.
def : Pat<(i32 (rotr (riscv_grev GPR:$rs1, 24), (i32 16))), (GREVI GPR:$rs1, 8)>;
def : Pat<(i32 (rotl (riscv_grev GPR:$rs1, 24), (i32 16))), (GREVI GPR:$rs1, 8)>;
// We treat rev8 as a separate instruction, so match it directly.
def : Pat<(i32 (riscv_grev GPR:$rs1, 24)), (REV8_RV32 GPR:$rs1)>;
-
-// We treat zip and unzip as separate instructions, so match it directly.
-def : Pat<(i32 (riscv_shfl GPR:$rs1, 15)), (ZIP_RV32 GPR:$rs1)>;
-def : Pat<(i32 (riscv_unshfl GPR:$rs1, 15)), (UNZIP_RV32 GPR:$rs1)>;
} // Predicates = [HasStdExtZbp, IsRV32]
let Predicates = [HasStdExtZbp, IsRV64] in {
@@ -942,15 +949,34 @@ def : Pat<(riscv_fsl GPR:$rs3, GPR:$rs1, uimmlog2xlen:$shamt),
(FSRI GPR:$rs1, GPR:$rs3, (ImmSubFromXLen uimmlog2xlen:$shamt))>;
} // Predicates = [HasStdExtZbt]
+let Predicates = [HasStdExtZbt, IsRV64] in {
+def : Pat<(riscv_fslw GPR:$rs1, GPR:$rs3, GPR:$rs2),
+ (FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(riscv_fsrw GPR:$rs1, GPR:$rs3, GPR:$rs2),
+ (FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(riscv_fsrw GPR:$rs1, GPR:$rs3, uimm5:$shamt),
+ (FSRIW GPR:$rs1, GPR:$rs3, uimm5:$shamt)>;
+// We can use FSRIW for FSLW by immediate if we subtract the immediate from
+// 32 and swap the operands.
+def : Pat<(riscv_fslw GPR:$rs3, GPR:$rs1, uimm5:$shamt),
+ (FSRIW GPR:$rs1, GPR:$rs3, (ImmSubFrom32 uimm5:$shamt))>;
+} // Predicates = [HasStdExtZbt, IsRV64]
+
let Predicates = [HasStdExtZbb] in {
def : PatGpr<ctlz, CLZ>;
def : PatGpr<cttz, CTZ>;
def : PatGpr<ctpop, CPOP>;
} // Predicates = [HasStdExtZbb]
+let Predicates = [HasStdExtZbb, IsRV64] in {
+def : PatGpr<riscv_clzw, CLZW>;
+def : PatGpr<riscv_ctzw, CTZW>;
+def : Pat<(i64 (ctpop (i64 (zexti32 (i64 GPR:$rs1))))), (CPOPW GPR:$rs1)>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
let Predicates = [HasStdExtZbb] in {
-def : Pat<(sext_inreg GPR:$rs1, i8), (SEXTB GPR:$rs1)>;
-def : Pat<(sext_inreg GPR:$rs1, i16), (SEXTH GPR:$rs1)>;
+def : Pat<(sext_inreg GPR:$rs1, i8), (SEXT_B GPR:$rs1)>;
+def : Pat<(sext_inreg GPR:$rs1, i16), (SEXT_H GPR:$rs1)>;
}
let Predicates = [HasStdExtZbb] in {
@@ -968,35 +994,49 @@ let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in {
def : Pat<(i64 (bswap GPR:$rs1)), (REV8_RV64 GPR:$rs1)>;
} // Predicates = [HasStdExtZbbOrZbkb, IsRV64]
+let Predicates = [HasStdExtZbpOrZbkb] in {
+def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFFFF),
+ (and GPR:$rs1, 0x00FF)),
+ (PACKH GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or (shl (and GPR:$rs2, 0x00FF), (XLenVT 8)),
+ (and GPR:$rs1, 0x00FF)),
+ (PACKH GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbpOrZbkb]
+
let Predicates = [HasStdExtZbpOrZbkb, IsRV32] in
def : Pat<(i32 (or (and GPR:$rs1, 0x0000FFFF), (shl GPR:$rs2, (i32 16)))),
(PACK GPR:$rs1, GPR:$rs2)>;
+let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in {
+def : Pat<(i64 (or (and GPR:$rs1, 0x00000000FFFFFFFF), (shl GPR:$rs2, (i64 32)))),
+ (PACK GPR:$rs1, GPR:$rs2)>;
+
+def : Pat<(i64 (sext_inreg (or (shl GPR:$rs2, (i64 16)),
+ (and GPR:$rs1, 0x000000000000FFFF)),
+ i32)),
+ (PACKW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32),
+ (and GPR:$rs1, 0x000000000000FFFF))),
+ (PACKW GPR:$rs1, GPR:$rs2)>;
+}
+
let Predicates = [HasStdExtZbp, IsRV32] in
def : Pat<(i32 (or (and GPR:$rs2, 0xFFFF0000), (srl GPR:$rs1, (i32 16)))),
(PACKU GPR:$rs1, GPR:$rs2)>;
-let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in
-def : Pat<(i64 (or (and GPR:$rs1, 0x00000000FFFFFFFF), (shl GPR:$rs2, (i64 32)))),
- (PACK GPR:$rs1, GPR:$rs2)>;
-
-let Predicates = [HasStdExtZbp, IsRV64] in
+let Predicates = [HasStdExtZbp, IsRV64] in {
def : Pat<(i64 (or (and GPR:$rs2, 0xFFFFFFFF00000000), (srl GPR:$rs1, (i64 32)))),
(PACKU GPR:$rs1, GPR:$rs2)>;
-let Predicates = [HasStdExtZbpOrZbkb] in {
-def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFFFF),
- (and GPR:$rs1, 0x00FF)),
- (PACKH GPR:$rs1, GPR:$rs2)>;
-def : Pat<(or (shl (and GPR:$rs2, 0x00FF), (XLenVT 8)),
- (and GPR:$rs1, 0x00FF)),
- (PACKH GPR:$rs1, GPR:$rs2)>;
-} // Predicates = [HasStdExtZbpOrZbkb]
+def : Pat<(i64 (or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000),
+ (srl (and GPR:$rs1, 0xFFFFFFFF), (i64 16)))),
+ (PACKUW GPR:$rs1, GPR:$rs2)>;
+}
let Predicates = [HasStdExtZbbOrZbp, IsRV32] in
-def : Pat<(i32 (and GPR:$rs, 0xFFFF)), (ZEXTH_RV32 GPR:$rs)>;
+def : Pat<(i32 (and GPR:$rs, 0xFFFF)), (ZEXT_H_RV32 GPR:$rs)>;
let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
-def : Pat<(i64 (and GPR:$rs, 0xFFFF)), (ZEXTH_RV64 GPR:$rs)>;
+def : Pat<(i64 (and GPR:$rs, 0xFFFF)), (ZEXT_H_RV64 GPR:$rs)>;
// Pattern to exclude simm12 immediates from matching.
def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{
@@ -1074,80 +1114,26 @@ def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 81)),
let Predicates = [HasStdExtZba, IsRV64] in {
def : Pat<(i64 (shl (and GPR:$rs1, 0xFFFFFFFF), uimm5:$shamt)),
- (SLLIUW GPR:$rs1, uimm5:$shamt)>;
+ (SLLI_UW GPR:$rs1, uimm5:$shamt)>;
def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFF), non_imm12:$rs2)),
- (ADDUW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADDUW GPR:$rs, X0)>;
+ (ADD_UW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADD_UW GPR:$rs, X0)>;
def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 1)), non_imm12:$rs2)),
- (SH1ADDUW GPR:$rs1, GPR:$rs2)>;
+ (SH1ADD_UW GPR:$rs1, GPR:$rs2)>;
def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 2)), non_imm12:$rs2)),
- (SH2ADDUW GPR:$rs1, GPR:$rs2)>;
+ (SH2ADD_UW GPR:$rs1, GPR:$rs2)>;
def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 3)), non_imm12:$rs2)),
- (SH3ADDUW GPR:$rs1, GPR:$rs2)>;
+ (SH3ADD_UW GPR:$rs1, GPR:$rs2)>;
def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), non_imm12:$rs2)),
- (SH1ADDUW GPR:$rs1, GPR:$rs2)>;
+ (SH1ADD_UW GPR:$rs1, GPR:$rs2)>;
def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), non_imm12:$rs2)),
- (SH2ADDUW GPR:$rs1, GPR:$rs2)>;
+ (SH2ADD_UW GPR:$rs1, GPR:$rs2)>;
def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), non_imm12:$rs2)),
- (SH3ADDUW GPR:$rs1, GPR:$rs2)>;
+ (SH3ADD_UW GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtZba, IsRV64]
-let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in {
-def : PatGprGpr<riscv_rolw, ROLW>;
-def : PatGprGpr<riscv_rorw, RORW>;
-def : PatGprImm<riscv_rorw, RORIW, uimm5>;
-def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
- (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>;
-} // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64]
-
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def : Pat<(riscv_rorw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>;
-def : Pat<(riscv_rolw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>;
-def : PatGprGpr<riscv_grevw, GREVW>;
-def : PatGprGpr<riscv_gorcw, GORCW>;
-def : PatGprGpr<riscv_shflw, SHFLW>;
-def : PatGprGpr<riscv_unshflw, UNSHFLW>;
-def : PatGprImm<riscv_grevw, GREVIW, uimm5>;
-def : PatGprImm<riscv_gorcw, GORCIW, uimm5>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
-let Predicates = [HasStdExtZbt, IsRV64] in {
-def : Pat<(riscv_fslw GPR:$rs1, GPR:$rs3, GPR:$rs2),
- (FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
-def : Pat<(riscv_fsrw GPR:$rs1, GPR:$rs3, GPR:$rs2),
- (FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
-def : Pat<(riscv_fsrw GPR:$rs1, GPR:$rs3, uimm5:$shamt),
- (FSRIW GPR:$rs1, GPR:$rs3, uimm5:$shamt)>;
-// We can use FSRIW for FSLW by immediate if we subtract the immediate from
-// 32 and swap the operands.
-def : Pat<(riscv_fslw GPR:$rs3, GPR:$rs1, uimm5:$shamt),
- (FSRIW GPR:$rs1, GPR:$rs3, (ImmSubFrom32 uimm5:$shamt))>;
-} // Predicates = [HasStdExtZbt, IsRV64]
-
-let Predicates = [HasStdExtZbb, IsRV64] in {
-def : PatGpr<riscv_clzw, CLZW>;
-def : PatGpr<riscv_ctzw, CTZW>;
-def : Pat<(i64 (ctpop (i64 (zexti32 (i64 GPR:$rs1))))), (CPOPW GPR:$rs1)>;
-} // Predicates = [HasStdExtZbb, IsRV64]
-
-let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in {
-def : Pat<(i64 (sext_inreg (or (shl GPR:$rs2, (i64 16)),
- (and GPR:$rs1, 0x000000000000FFFF)),
- i32)),
- (PACKW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32),
- (and GPR:$rs1, 0x000000000000FFFF))),
- (PACKW GPR:$rs1, GPR:$rs2)>;
-}
-
-let Predicates = [HasStdExtZbp, IsRV64] in
-def : Pat<(i64 (or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000),
- (srl (and GPR:$rs1, 0xFFFFFFFF), (i64 16)))),
- (PACKUW GPR:$rs1, GPR:$rs2)>;
-
-
let Predicates = [HasStdExtZbcOrZbkc] in {
def : PatGprGpr<int_riscv_clmul, CLMUL>;
def : PatGprGpr<int_riscv_clmulh, CLMULH>;
@@ -1167,17 +1153,17 @@ def : PatGprGpr<riscv_bdecompressw, BDECOMPRESSW>;
} // Predicates = [HasStdExtZbe, IsRV64]
let Predicates = [HasStdExtZbr] in {
-def : PatGpr<int_riscv_crc32_b, CRC32B>;
-def : PatGpr<int_riscv_crc32_h, CRC32H>;
-def : PatGpr<int_riscv_crc32_w, CRC32W>;
-def : PatGpr<int_riscv_crc32c_b, CRC32CB>;
-def : PatGpr<int_riscv_crc32c_h, CRC32CH>;
-def : PatGpr<int_riscv_crc32c_w, CRC32CW>;
+def : PatGpr<int_riscv_crc32_b, CRC32_B>;
+def : PatGpr<int_riscv_crc32_h, CRC32_H>;
+def : PatGpr<int_riscv_crc32_w, CRC32_W>;
+def : PatGpr<int_riscv_crc32c_b, CRC32C_B>;
+def : PatGpr<int_riscv_crc32c_h, CRC32C_H>;
+def : PatGpr<int_riscv_crc32c_w, CRC32C_W>;
} // Predicates = [HasStdExtZbr]
let Predicates = [HasStdExtZbr, IsRV64] in {
-def : PatGpr<int_riscv_crc32_d, CRC32D>;
-def : PatGpr<int_riscv_crc32c_d, CRC32CD>;
+def : PatGpr<int_riscv_crc32_d, CRC32_D>;
+def : PatGpr<int_riscv_crc32c_d, CRC32C_D>;
} // Predicates = [HasStdExtZbr, IsRV64]
let Predicates = [HasStdExtZbf] in
@@ -1186,16 +1172,7 @@ def : PatGprGpr<riscv_bfp, BFP>;
let Predicates = [HasStdExtZbf, IsRV64] in
def : PatGprGpr<riscv_bfpw, BFPW>;
-let Predicates = [HasStdExtZbkb] in {
-def : PatGpr<int_riscv_brev8, BREV8>;
-} // Predicates = [HasStdExtZbkb]
-
-let Predicates = [HasStdExtZbkb, IsRV32] in {
-def : PatGpr<int_riscv_zip, ZIP_RV32>;
-def : PatGpr<int_riscv_unzip, UNZIP_RV32>;
-} // Predicates = [HasStdExtZbkb, IsRV32]
-
let Predicates = [HasStdExtZbkx] in {
-def : PatGprGpr<int_riscv_xperm4, XPERMN>;
-def : PatGprGpr<int_riscv_xperm8, XPERMB>;
+def : PatGprGpr<int_riscv_xperm4, XPERM4>;
+def : PatGprGpr<int_riscv_xperm8, XPERM8>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index dfd0c74ee26c..a2753c132354 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -29,14 +29,14 @@ def riscv_fmv_x_anyexth
// Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZfhmin] in {
+let Predicates = [HasStdExtZfhOrZfhmin] in {
def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>;
// Operands for stores are in the order srcreg, base, offset rather than
// reflecting the order these fields are specified in the instruction
// encoding.
def FSH : FPStore_r<0b001, "fsh", FPR16, WriteFST16>;
-} // Predicates = [HasStdExtZfhmin]
+} // Predicates = [HasStdExtZfhOrZfhmin]
let Predicates = [HasStdExtZfh] in {
let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16] in {
@@ -98,7 +98,7 @@ def FCVT_H_WU : FPUnaryOp_r_frm<0b1101010, 0b00001, FPR16, GPR, "fcvt.h.wu">,
def : FPUnaryOpDynFrmAlias<FCVT_H_WU, "fcvt.h.wu", FPR16, GPR>;
} // Predicates = [HasStdExtZfh]
-let Predicates = [HasStdExtZfhmin] in {
+let Predicates = [HasStdExtZfhOrZfhmin] in {
def FCVT_H_S : FPUnaryOp_r_frm<0b0100010, 0b00000, FPR16, FPR32, "fcvt.h.s">,
Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]>;
def : FPUnaryOpDynFrmAlias<FCVT_H_S, "fcvt.h.s", FPR16, FPR32>;
@@ -113,7 +113,7 @@ def FMV_X_H : FPUnaryOp_r<0b1110010, 0b00000, 0b000, GPR, FPR16, "fmv.x.h">,
let mayRaiseFPException = 0 in
def FMV_H_X : FPUnaryOp_r<0b1111010, 0b00000, 0b000, FPR16, GPR, "fmv.h.x">,
Sched<[WriteFMovI16ToF16, ReadFMovI16ToF16]>;
-} // Predicates = [HasStdExtZfhmin]
+} // Predicates = [HasStdExtZfhOrZfhmin]
let Predicates = [HasStdExtZfh] in {
@@ -146,23 +146,23 @@ def FCVT_H_LU : FPUnaryOp_r_frm<0b1101010, 0b00011, FPR16, GPR, "fcvt.h.lu">,
def : FPUnaryOpDynFrmAlias<FCVT_H_LU, "fcvt.h.lu", FPR16, GPR>;
} // Predicates = [HasStdExtZfh, IsRV64]
-let Predicates = [HasStdExtZfhmin, HasStdExtD] in {
+let Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] in {
def FCVT_H_D : FPUnaryOp_r_frm<0b0100010, 0b00001, FPR16, FPR64, "fcvt.h.d">,
Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]>;
def : FPUnaryOpDynFrmAlias<FCVT_H_D, "fcvt.h.d", FPR16, FPR64>;
def FCVT_D_H : FPUnaryOp_r<0b0100001, 0b00010, 0b000, FPR64, FPR16, "fcvt.d.h">,
Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]>;
-} // Predicates = [HasStdExtZfhmin, HasStdExtD]
+} // Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD]
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20)
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZfhmin] in {
+let Predicates = [HasStdExtZfhOrZfhmin] in {
def : InstAlias<"flh $rd, (${rs1})", (FLH FPR16:$rd, GPR:$rs1, 0), 0>;
def : InstAlias<"fsh $rs2, (${rs1})", (FSH FPR16:$rs2, GPR:$rs1, 0), 0>;
-} // Predicates = [HasStdExtZfhmin]
+} // Predicates = [HasStdExtZfhOrZfhmin]
let Predicates = [HasStdExtZfh] in {
def : InstAlias<"fmv.h $rd, $rs", (FSGNJ_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>;
@@ -177,14 +177,14 @@ def : InstAlias<"fge.h $rd, $rs, $rt",
(FLE_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
} // Predicates = [HasStdExtZfh]
-let Predicates = [HasStdExtZfhmin] in {
+let Predicates = [HasStdExtZfhOrZfhmin] in {
def PseudoFLH : PseudoFloatLoad<"flh", FPR16>;
def PseudoFSH : PseudoStore<"fsh", FPR16>;
let usesCustomInserter = 1 in {
def PseudoQuietFLE_H : PseudoQuietFCMP<FPR16>;
def PseudoQuietFLT_H : PseudoQuietFCMP<FPR16>;
}
-} // Predicates = [HasStdExtZfhmin]
+} // Predicates = [HasStdExtZfhOrZfhmin]
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
@@ -281,7 +281,7 @@ def : PatSetCC<FPR16, any_fsetccs, SETOLE, FLE_H>;
def Select_FPR16_Using_CC_GPR : SelectCC_rrirr<FPR16, GPR>;
} // Predicates = [HasStdExtZfh]
-let Predicates = [HasStdExtZfhmin] in {
+let Predicates = [HasStdExtZfhOrZfhmin] in {
/// Loads
defm : LdPat<load, FLH, f16>;
@@ -299,7 +299,7 @@ def : Pat<(any_fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>;
// Moves (no conversion)
def : Pat<(riscv_fmv_h_x GPR:$src), (FMV_H_X GPR:$src)>;
def : Pat<(riscv_fmv_x_anyexth FPR16:$src), (FMV_X_H FPR16:$src)>;
-} // Predicates = [HasStdExtZfhmin]
+} // Predicates = [HasStdExtZfhOrZfhmin]
let Predicates = [HasStdExtZfh, IsRV32] in {
// half->[u]int. Round-to-zero must be used.
@@ -351,7 +351,7 @@ def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_H_L $rs1, 0b111)>;
def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_H_LU $rs1, 0b111)>;
} // Predicates = [HasStdExtZfh, IsRV64]
-let Predicates = [HasStdExtZfhmin, HasStdExtD] in {
+let Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] in {
/// Float conversion operations
// f64 -> f16, f16 -> f64
def : Pat<(any_fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>;
@@ -361,4 +361,4 @@ def : Pat<(any_fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>;
def : Pat<(fcopysign FPR16:$rs1, FPR64:$rs2),
(FSGNJ_H $rs1, (FCVT_H_D $rs2, 0b111))>;
def : Pat<(fcopysign FPR64:$rs1, FPR16:$rs2), (FSGNJ_D $rs1, (FCVT_D_H $rs2))>;
-} // Predicates = [HasStdExtZfhmin, HasStdExtD]
+} // Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
index 4a41cddedc71..e4e07f4789a6 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
@@ -1,4 +1,4 @@
-//===- RISCVInstrInfoZk.td - RISC-V Scalar Crypto instructions - tablegen -*===//
+//===- RISCVInstrInfoZk.td - RISC-V 'Zk' instructions ------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp
index 12ec52925798..715d92b036e3 100644
--- a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp
@@ -99,9 +99,9 @@ static bool isSignExtendingOpW(const MachineInstr &MI) {
case RISCV::SLTI:
case RISCV::SLTU:
case RISCV::SLTIU:
- case RISCV::SEXTB:
- case RISCV::SEXTH:
- case RISCV::ZEXTH_RV64:
+ case RISCV::SEXT_B:
+ case RISCV::SEXT_H:
+ case RISCV::ZEXT_H_RV64:
return true;
// shifting right sufficiently makes the value 32-bit sign-extended
case RISCV::SRAI:
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 044dda0a1ccc..34c6e8e684ac 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -195,6 +195,7 @@ public:
return 0;
}
+ unsigned getMinVLen() const { return ZvlLen; }
RISCVABI::ABI getTargetABI() const { return TargetABI; }
bool isRegisterReservedByUser(Register i) const {
assert(i < RISCV::NUM_TARGET_REGS && "Register out of range");
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index e950f9582f09..4d69040a4508 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -8,6 +8,7 @@
#include "MCTargetDesc/SparcFixupKinds.h"
#include "MCTargetDesc/SparcMCTargetDesc.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
@@ -131,6 +132,23 @@ namespace {
return Sparc::NumTargetFixupKinds;
}
+ Optional<MCFixupKind> getFixupKind(StringRef Name) const override {
+ unsigned Type;
+ Type = llvm::StringSwitch<unsigned>(Name)
+#define ELF_RELOC(X, Y) .Case(#X, Y)
+#include "llvm/BinaryFormat/ELFRelocs/Sparc.def"
+#undef ELF_RELOC
+ .Case("BFD_RELOC_NONE", ELF::R_SPARC_NONE)
+ .Case("BFD_RELOC_8", ELF::R_SPARC_8)
+ .Case("BFD_RELOC_16", ELF::R_SPARC_16)
+ .Case("BFD_RELOC_32", ELF::R_SPARC_32)
+ .Case("BFD_RELOC_64", ELF::R_SPARC_64)
+ .Default(-1u);
+ if (Type == -1u)
+ return None;
+ return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
+ }
+
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
const static MCFixupKindInfo InfosBE[Sparc::NumTargetFixupKinds] = {
// name offset bits flags
@@ -216,6 +234,11 @@ namespace {
{ "fixup_sparc_tls_le_lox10", 0, 0, 0 }
};
+ // Fixup kinds from .reloc directive are like R_SPARC_NONE. They do
+ // not require any extra processing.
+ if (Kind >= FirstLiteralRelocationKind)
+ return MCAsmBackend::getFixupKindInfo(FK_NONE);
+
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
@@ -229,6 +252,8 @@ namespace {
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target) override {
+ if (Fixup.getKind() >= FirstLiteralRelocationKind)
+ return true;
switch ((Sparc::Fixups)Fixup.getKind()) {
default:
return false;
@@ -299,6 +324,8 @@ namespace {
uint64_t Value, bool IsResolved,
const MCSubtargetInfo *STI) const override {
+ if (Fixup.getKind() >= FirstLiteralRelocationKind)
+ return;
Value = adjustFixupValue(Fixup.getKind(), Value);
if (!Value) return; // Doesn't change encoding.
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
index bc508b45c3bd..02261dc5c4cd 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
@@ -42,6 +42,9 @@ unsigned SparcELFObjectWriter::getRelocType(MCContext &Ctx,
const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
+ MCFixupKind Kind = Fixup.getKind();
+ if (Kind >= FirstLiteralRelocationKind)
+ return Kind - FirstLiteralRelocationKind;
if (const SparcMCExpr *SExpr = dyn_cast<SparcMCExpr>(Fixup.getValue())) {
if (SExpr->getKind() == SparcMCExpr::VK_Sparc_R_DISP32)
@@ -68,6 +71,7 @@ unsigned SparcELFObjectWriter::getRelocType(MCContext &Ctx,
switch(Fixup.getTargetKind()) {
default:
llvm_unreachable("Unimplemented fixup -> relocation");
+ case FK_NONE: return ELF::R_SPARC_NONE;
case FK_Data_1: return ELF::R_SPARC_8;
case FK_Data_2: return ((Fixup.getOffset() % 2)
? ELF::R_SPARC_UA16
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index ccc7d0737f53..610627e7e3f0 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -80,6 +80,88 @@ MachineBasicBlock::iterator SystemZFrameLowering::eliminateCallFramePseudoInstr(
}
}
+namespace {
+struct SZFrameSortingObj {
+ bool IsValid = false; // True if we care about this Object.
+ uint32_t ObjectIndex = 0; // Index of Object into MFI list.
+ uint64_t ObjectSize = 0; // Size of Object in bytes.
+ uint32_t D12Count = 0; // 12-bit displacement only.
+ uint32_t DPairCount = 0; // 12 or 20 bit displacement.
+};
+typedef std::vector<SZFrameSortingObj> SZFrameObjVec;
+} // namespace
+
+// TODO: Move to base class.
+void SystemZELFFrameLowering::orderFrameObjects(
+ const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ // Make a vector of sorting objects to track all MFI objects and mark those
+ // to be sorted as valid.
+ if (ObjectsToAllocate.size() <= 1)
+ return;
+ SZFrameObjVec SortingObjects(MFI.getObjectIndexEnd());
+ for (auto &Obj : ObjectsToAllocate) {
+ SortingObjects[Obj].IsValid = true;
+ SortingObjects[Obj].ObjectIndex = Obj;
+ SortingObjects[Obj].ObjectSize = MFI.getObjectSize(Obj);
+ }
+
+ // Examine uses for each object and record short (12-bit) and "pair"
+ // displacement types.
+ for (auto &MBB : MF)
+ for (auto &MI : MBB) {
+ if (MI.isDebugInstr())
+ continue;
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isFI())
+ continue;
+ int Index = MO.getIndex();
+ if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
+ SortingObjects[Index].IsValid) {
+ if (TII->hasDisplacementPairInsn(MI.getOpcode()))
+ SortingObjects[Index].DPairCount++;
+ else if (!(MI.getDesc().TSFlags & SystemZII::Has20BitOffset))
+ SortingObjects[Index].D12Count++;
+ }
+ }
+ }
+
+ // Sort all objects for short/paired displacements, which should be
+ // sufficient as it seems like all frame objects typically are within the
+ // long displacement range. Sorting works by computing the "density" as
+ // Count / ObjectSize. The comparisons of two such fractions are refactored
+ // by multiplying both sides with A.ObjectSize * B.ObjectSize, in order to
+ // eliminate the (fp) divisions. A higher density object needs to go after
+ // in the list in order for it to end up lower on the stack.
+ auto CmpD12 = [](const SZFrameSortingObj &A, const SZFrameSortingObj &B) {
+ // Put all invalid and variable sized objects at the end.
+ if (!A.IsValid || !B.IsValid)
+ return A.IsValid;
+ if (!A.ObjectSize || !B.ObjectSize)
+ return A.ObjectSize > 0;
+ uint64_t ADensityCmp = A.D12Count * B.ObjectSize;
+ uint64_t BDensityCmp = B.D12Count * A.ObjectSize;
+ if (ADensityCmp != BDensityCmp)
+ return ADensityCmp < BDensityCmp;
+ return A.DPairCount * B.ObjectSize < B.DPairCount * A.ObjectSize;
+ };
+ std::stable_sort(SortingObjects.begin(), SortingObjects.end(), CmpD12);
+
+ // Now modify the original list to represent the final order that
+ // we want.
+ unsigned Idx = 0;
+ for (auto &Obj : SortingObjects) {
+ // All invalid items are sorted at the end, so it's safe to stop.
+ if (!Obj.IsValid)
+ break;
+ ObjectsToAllocate[Idx++] = Obj.ObjectIndex;
+ }
+}
+
bool SystemZFrameLowering::hasReservedCallFrame(
const MachineFunction &MF) const {
// The ELF ABI requires us to allocate 160 bytes of stack space for the
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 3a1af888d8f9..2b3d7efed53b 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -77,6 +77,9 @@ public:
bool hasFP(const MachineFunction &MF) const override;
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
Register &FrameReg) const override;
+ void
+ orderFrameObjects(const MachineFunction &MF,
+ SmallVectorImpl<int> &ObjectsToAllocate) const override;
// Return the byte offset from the incoming stack pointer of Reg's
// ABI-defined save slot. Return 0 if no slot is defined for Reg. Adjust
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index a8ddb8c62d18..de446f33f5f1 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -443,6 +443,11 @@ public:
EVT VT) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
+ bool ShouldShrinkFPConstant(EVT VT) const override {
+ // Do not shrink 64-bit FP constpool entries since LDEB is slower than
+ // LD, and having the full constant in memory enables reg/mem opcodes.
+ return VT != MVT::f64;
+ }
bool hasInlineStackProbe(MachineFunction &MF) const override;
bool isLegalICmpImmediate(int64_t Imm) const override;
bool isLegalAddImmediate(int64_t Imm) const override;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 6db9bf3056b7..4b6aa60f5d55 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1652,6 +1652,13 @@ unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode,
return 0;
}
+bool SystemZInstrInfo::hasDisplacementPairInsn(unsigned Opcode) const {
+ const MCInstrDesc &MCID = get(Opcode);
+ if (MCID.TSFlags & SystemZII::Has20BitOffset)
+ return SystemZ::getDisp12Opcode(Opcode) >= 0;
+ return SystemZ::getDisp20Opcode(Opcode) >= 0;
+}
+
unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const {
switch (Opcode) {
case SystemZ::L: return SystemZ::LT;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 396f56c7f59c..9e5b2729a707 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -312,6 +312,9 @@ public:
// exists.
unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const;
+ // Return true if Opcode has a mapping in 12 <-> 20 bit displacements.
+ bool hasDisplacementPairInsn(unsigned Opcode) const;
+
// If Opcode is a load instruction that has a LOAD AND TEST form,
// return the opcode for the testing form, otherwise return 0.
unsigned getLoadAndTest(unsigned Opcode) const;
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp
index 0412e524f800..0f1655718481 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp
@@ -167,3 +167,41 @@ wasm::ValType WebAssembly::regClassToValType(unsigned RC) {
llvm_unreachable("unexpected type");
}
}
+
+void WebAssembly::wasmSymbolSetType(MCSymbolWasm *Sym, const Type *GlobalVT,
+ const SmallVector<MVT, 1> &VTs) {
+ assert(!Sym->getType());
+
+ // Tables are represented as Arrays in LLVM IR therefore
+ // they reach this point as aggregate Array types with an element type
+ // that is a reference type.
+ wasm::ValType Type;
+ bool IsTable = false;
+ if (GlobalVT->isArrayTy() &&
+ WebAssembly::isRefType(GlobalVT->getArrayElementType())) {
+ MVT VT;
+ IsTable = true;
+ switch (GlobalVT->getArrayElementType()->getPointerAddressSpace()) {
+ case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF:
+ VT = MVT::funcref;
+ break;
+ case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF:
+ VT = MVT::externref;
+ break;
+ default:
+ report_fatal_error("unhandled address space type");
+ }
+ Type = WebAssembly::toValType(VT);
+ } else if (VTs.size() == 1) {
+ Type = WebAssembly::toValType(VTs[0]);
+ } else
+ report_fatal_error("Aggregate globals not yet implemented");
+
+ if (IsTable) {
+ Sym->setType(wasm::WASM_SYMBOL_TYPE_TABLE);
+ Sym->setTableType(Type);
+ } else {
+ Sym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
+ Sym->setGlobalType(wasm::WasmGlobalType{uint8_t(Type), /*Mutable=*/true});
+ }
+}
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
index 042d51c7d6cb..cdb95d48398d 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
@@ -17,6 +17,8 @@
#include "llvm/ADT/Optional.h"
#include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/Support/MachineValueType.h"
namespace llvm {
@@ -41,6 +43,43 @@ enum class BlockType : unsigned {
Multivalue = 0xffff,
};
+enum WasmAddressSpace : unsigned {
+ // Default address space, for pointers to linear memory (stack, heap, data).
+ WASM_ADDRESS_SPACE_DEFAULT = 0,
+ // A non-integral address space for pointers to named objects outside of
+ // linear memory: WebAssembly globals or WebAssembly locals. Loads and stores
+ // to these pointers are lowered to global.get / global.set or local.get /
+ // local.set, as appropriate.
+ WASM_ADDRESS_SPACE_VAR = 1,
+ // A non-integral address space for externref values
+ WASM_ADDRESS_SPACE_EXTERNREF = 10,
+ // A non-integral address space for funcref values
+ WASM_ADDRESS_SPACE_FUNCREF = 20,
+};
+
+inline bool isDefaultAddressSpace(unsigned AS) {
+ return AS == WASM_ADDRESS_SPACE_DEFAULT;
+}
+inline bool isWasmVarAddressSpace(unsigned AS) {
+ return AS == WASM_ADDRESS_SPACE_VAR;
+}
+inline bool isValidAddressSpace(unsigned AS) {
+ return isDefaultAddressSpace(AS) || isWasmVarAddressSpace(AS);
+}
+inline bool isFuncrefType(const Type *Ty) {
+ return isa<PointerType>(Ty) &&
+ Ty->getPointerAddressSpace() ==
+ WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF;
+}
+inline bool isExternrefType(const Type *Ty) {
+ return isa<PointerType>(Ty) &&
+ Ty->getPointerAddressSpace() ==
+ WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF;
+}
+inline bool isRefType(const Type *Ty) {
+ return isFuncrefType(Ty) || isExternrefType(Ty);
+}
+
// Convert StringRef to ValType / HealType / BlockType
Optional<wasm::ValType> parseType(StringRef Type);
@@ -68,6 +107,10 @@ wasm::ValType toValType(MVT Type);
// Convert a register class to a wasm ValType.
wasm::ValType regClassToValType(unsigned RC);
+/// Sets a Wasm Symbol Type.
+void wasmSymbolSetType(MCSymbolWasm *Sym, const Type *GlobalVT,
+ const SmallVector<MVT, 1> &VTs);
+
} // end namespace WebAssembly
} // end namespace llvm
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
index 57e40f6cd8d7..cdfc758db7ac 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
@@ -15,7 +15,6 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/Support/CommandLine.h"
namespace llvm {
@@ -30,43 +29,6 @@ class WebAssemblySubtarget;
namespace WebAssembly {
-enum WasmAddressSpace : unsigned {
- // Default address space, for pointers to linear memory (stack, heap, data).
- WASM_ADDRESS_SPACE_DEFAULT = 0,
- // A non-integral address space for pointers to named objects outside of
- // linear memory: WebAssembly globals or WebAssembly locals. Loads and stores
- // to these pointers are lowered to global.get / global.set or local.get /
- // local.set, as appropriate.
- WASM_ADDRESS_SPACE_VAR = 1,
- // A non-integral address space for externref values
- WASM_ADDRESS_SPACE_EXTERNREF = 10,
- // A non-integral address space for funcref values
- WASM_ADDRESS_SPACE_FUNCREF = 20,
-};
-
-inline bool isDefaultAddressSpace(unsigned AS) {
- return AS == WASM_ADDRESS_SPACE_DEFAULT;
-}
-inline bool isWasmVarAddressSpace(unsigned AS) {
- return AS == WASM_ADDRESS_SPACE_VAR;
-}
-inline bool isValidAddressSpace(unsigned AS) {
- return isDefaultAddressSpace(AS) || isWasmVarAddressSpace(AS);
-}
-inline bool isFuncrefType(const Type *Ty) {
- return isa<PointerType>(Ty) &&
- Ty->getPointerAddressSpace() ==
- WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF;
-}
-inline bool isExternrefType(const Type *Ty) {
- return isa<PointerType>(Ty) &&
- Ty->getPointerAddressSpace() ==
- WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF;
-}
-inline bool isRefType(const Type *Ty) {
- return isFuncrefType(Ty) || isExternrefType(Ty);
-}
-
bool isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI);
bool mayThrow(const MachineInstr &MI);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index e3af6b2662ef..bf326e5106be 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -181,17 +181,11 @@ void WebAssemblyAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
if (!Sym->getType()) {
const WebAssemblyTargetLowering &TLI = *Subtarget->getTargetLowering();
- SmallVector<EVT, 1> VTs;
- ComputeValueVTs(TLI, GV->getParent()->getDataLayout(), GV->getValueType(),
- VTs);
- if (VTs.size() != 1 ||
- TLI.getNumRegisters(GV->getParent()->getContext(), VTs[0]) != 1)
- report_fatal_error("Aggregate globals not yet implemented");
- MVT VT = TLI.getRegisterType(GV->getParent()->getContext(), VTs[0]);
- bool Mutable = true;
- wasm::ValType Type = WebAssembly::toValType(VT);
- Sym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
- Sym->setGlobalType(wasm::WasmGlobalType{uint8_t(Type), Mutable});
+ SmallVector<MVT, 1> VTs;
+ Type *GlobalVT = GV->getValueType();
+ computeLegalValueVTs(TLI, GV->getParent()->getContext(),
+ GV->getParent()->getDataLayout(), GlobalVT, VTs);
+ WebAssembly::wasmSymbolSetType(Sym, GlobalVT, VTs);
}
// If the GlobalVariable refers to a table, we handle it here instead of
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 406edef8ff3f..8ddd414b043a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -16,6 +16,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "Utils/WebAssemblyTypeUtilities.h"
#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index c45f7d7176b5..01baa3d9389d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -19,7 +19,7 @@
#include "WebAssemblyFrameLowering.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "Utils/WebAssemblyUtilities.h"
+#include "Utils/WebAssemblyTypeUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyInstrInfo.h"
#include "WebAssemblyMachineFunctionInfo.h"
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index fe656753889f..b6c43be03aba 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -560,6 +560,9 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) {
NEltArg = NEltArg.getValue() + 1;
FnAttrs.addAllocSizeAttr(SizeArg, NEltArg);
}
+ // In case the callee has 'noreturn' attribute, We need to remove it, because
+ // we expect invoke wrappers to return.
+ FnAttrs.removeAttribute(Attribute::NoReturn);
// Reconstruct the AttributesList based on the vector we constructed.
AttributeList NewCallAL = AttributeList::get(
@@ -630,9 +633,9 @@ static bool canLongjmp(const Value *Callee) {
// Exception-catching related functions
//
- // We intentionally excluded __cxa_end_catch here even though it surely cannot
- // longjmp, in order to maintain the unwind relationship from all existing
- // catchpads (and calls within them) to catch.dispatch.longjmp.
+ // We intentionally treat __cxa_end_catch longjmpable in Wasm SjLj even though
+ // it surely cannot longjmp, in order to maintain the unwind relationship from
+ // all existing catchpads (and calls within them) to catch.dispatch.longjmp.
//
// In Wasm EH + Wasm SjLj, we
// 1. Make all catchswitch and cleanuppad that unwind to caller unwind to
@@ -663,6 +666,8 @@ static bool canLongjmp(const Value *Callee) {
//
// The comment block in findWasmUnwindDestinations() in
// SelectionDAGBuilder.cpp is addressing a similar problem.
+ if (CalleeName == "__cxa_end_catch")
+ return WebAssembly::WasmEnableSjLj;
if (CalleeName == "__cxa_begin_catch" ||
CalleeName == "__cxa_allocate_exception" || CalleeName == "__cxa_throw" ||
CalleeName == "__clang_call_terminate")
@@ -869,15 +874,17 @@ static void nullifySetjmp(Function *F) {
Function *SetjmpF = M.getFunction("setjmp");
SmallVector<Instruction *, 1> ToErase;
- for (User *U : SetjmpF->users()) {
- auto *CI = dyn_cast<CallInst>(U);
- // FIXME 'invoke' to setjmp can happen when we use Wasm EH + Wasm SjLj, but
- // we don't support two being used together yet.
- if (!CI)
- report_fatal_error("Wasm EH + Wasm SjLj is not fully supported yet");
- BasicBlock *BB = CI->getParent();
+ for (User *U : make_early_inc_range(SetjmpF->users())) {
+ auto *CB = cast<CallBase>(U);
+ BasicBlock *BB = CB->getParent();
if (BB->getParent() != F) // in other function
continue;
+ CallInst *CI = nullptr;
+ // setjmp cannot throw. So if it is an invoke, lower it to a call
+ if (auto *II = dyn_cast<InvokeInst>(CB))
+ CI = llvm::changeToCall(II);
+ else
+ CI = cast<CallInst>(CB);
ToErase.push_back(CI);
CI->replaceAllUsesWith(IRB.getInt32(0));
}
@@ -1313,10 +1320,13 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
SmallVector<PHINode *, 4> SetjmpRetPHIs;
Function *SetjmpF = M.getFunction("setjmp");
for (auto *U : make_early_inc_range(SetjmpF->users())) {
- auto *CB = dyn_cast<CallBase>(U);
+ auto *CB = cast<CallBase>(U);
BasicBlock *BB = CB->getParent();
if (BB->getParent() != &F) // in other function
continue;
+ if (CB->getOperandBundle(LLVMContext::OB_funclet))
+ report_fatal_error(
+ "setjmp within a catch clause is not supported in Wasm EH");
CallInst *CI = nullptr;
// setjmp cannot throw. So if it is an invoke, lower it to a call
@@ -1815,10 +1825,10 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj(
BasicBlock *UnwindDest = nullptr;
if (auto Bundle = CI->getOperandBundle(LLVMContext::OB_funclet)) {
Instruction *FromPad = cast<Instruction>(Bundle->Inputs[0]);
- while (!UnwindDest && FromPad) {
+ while (!UnwindDest) {
if (auto *CPI = dyn_cast<CatchPadInst>(FromPad)) {
UnwindDest = CPI->getCatchSwitch()->getUnwindDest();
- FromPad = nullptr; // stop searching
+ break;
} else if (auto *CPI = dyn_cast<CleanupPadInst>(FromPad)) {
// getCleanupRetUnwindDest() can return nullptr when
// 1. This cleanuppad's matching cleanupret uwninds to caller
@@ -1826,7 +1836,10 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj(
// unreachable.
// In case of 2, we need to traverse the parent pad chain.
UnwindDest = getCleanupRetUnwindDest(CPI);
- FromPad = cast<Instruction>(CPI->getParentPad());
+ Value *ParentPad = CPI->getParentPad();
+ if (isa<ConstantTokenNone>(ParentPad))
+ break;
+ FromPad = cast<Instruction>(ParentPad);
}
}
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp
index 8ff916c28c4e..6fd87f10150d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp
@@ -14,7 +14,7 @@
///
//===----------------------------------------------------------------------===//
-#include "Utils/WebAssemblyUtilities.h"
+#include "Utils/WebAssemblyTypeUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblySubtarget.h"
#include "llvm/IR/InstIterator.h"
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index 09bccef17ab0..2e6027a5605c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -59,39 +59,7 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
SmallVector<MVT, 1> VTs;
computeLegalValueVTs(CurrentFunc, TM, GlobalVT, VTs);
- // Tables are represented as Arrays in LLVM IR therefore
- // they reach this point as aggregate Array types with an element type
- // that is a reference type.
- wasm::ValType Type;
- bool IsTable = false;
- if (GlobalVT->isArrayTy() &&
- WebAssembly::isRefType(GlobalVT->getArrayElementType())) {
- MVT VT;
- IsTable = true;
- switch (GlobalVT->getArrayElementType()->getPointerAddressSpace()) {
- case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF:
- VT = MVT::funcref;
- break;
- case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF:
- VT = MVT::externref;
- break;
- default:
- report_fatal_error("unhandled address space type");
- }
- Type = WebAssembly::toValType(VT);
- } else if (VTs.size() == 1) {
- Type = WebAssembly::toValType(VTs[0]);
- } else
- report_fatal_error("Aggregate globals not yet implemented");
-
- if (IsTable) {
- WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TABLE);
- WasmSym->setTableType(Type);
- } else {
- WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
- WasmSym->setGlobalType(
- wasm::WasmGlobalType{uint8_t(Type), /*Mutable=*/true});
- }
+ WebAssembly::wasmSymbolSetType(WasmSym, GlobalVT, VTs);
}
return WasmSym;
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
index 00b11321fdb2..ea80e96d50de 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
@@ -30,22 +30,28 @@ void WebAssemblyFunctionInfo::initWARegs(MachineRegisterInfo &MRI) {
WARegs.resize(MRI.getNumVirtRegs(), Reg);
}
-void llvm::computeLegalValueVTs(const Function &F, const TargetMachine &TM,
+void llvm::computeLegalValueVTs(const WebAssemblyTargetLowering &TLI,
+ LLVMContext &Ctx, const DataLayout &DL,
Type *Ty, SmallVectorImpl<MVT> &ValueVTs) {
- const DataLayout &DL(F.getParent()->getDataLayout());
- const WebAssemblyTargetLowering &TLI =
- *TM.getSubtarget<WebAssemblySubtarget>(F).getTargetLowering();
SmallVector<EVT, 4> VTs;
ComputeValueVTs(TLI, DL, Ty, VTs);
for (EVT VT : VTs) {
- unsigned NumRegs = TLI.getNumRegisters(F.getContext(), VT);
- MVT RegisterVT = TLI.getRegisterType(F.getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(Ctx, VT);
+ MVT RegisterVT = TLI.getRegisterType(Ctx, VT);
for (unsigned I = 0; I != NumRegs; ++I)
ValueVTs.push_back(RegisterVT);
}
}
+void llvm::computeLegalValueVTs(const Function &F, const TargetMachine &TM,
+ Type *Ty, SmallVectorImpl<MVT> &ValueVTs) {
+ const DataLayout &DL(F.getParent()->getDataLayout());
+ const WebAssemblyTargetLowering &TLI =
+ *TM.getSubtarget<WebAssemblySubtarget>(F).getTargetLowering();
+ computeLegalValueVTs(TLI, F.getContext(), DL, Ty, ValueVTs);
+}
+
void llvm::computeSignatureVTs(const FunctionType *Ty,
const Function *TargetFunc,
const Function &ContextFunc,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index 3fa2d0c8a2f2..413d0d1dc554 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -166,6 +166,10 @@ public:
void setWasmEHFuncInfo(WasmEHFuncInfo *Info) { WasmEHInfo = Info; }
};
+void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI,
+ LLVMContext &Ctx, const DataLayout &DL, Type *Ty,
+ SmallVectorImpl<MVT> &ValueVTs);
+
void computeLegalValueVTs(const Function &F, const TargetMachine &TM, Type *Ty,
SmallVectorImpl<MVT> &ValueVTs);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index aff72452af6c..90753b5b4d33 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -805,8 +805,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
// Some FP actions are always expanded for vector types.
- for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
- MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
+ for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
+ MVT::v4f32, MVT::v8f32, MVT::v16f32,
+ MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
@@ -1094,13 +1095,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (VT == MVT::v2i64) continue;
setOperationAction(ISD::ROTL, VT, Custom);
setOperationAction(ISD::ROTR, VT, Custom);
+ setOperationAction(ISD::FSHL, VT, Custom);
+ setOperationAction(ISD::FSHR, VT, Custom);
}
- setOperationAction(ISD::FSHL, MVT::v16i8, Custom);
- setOperationAction(ISD::FSHR, MVT::v16i8, Custom);
- setOperationAction(ISD::FSHL, MVT::v4i32, Custom);
- setOperationAction(ISD::FSHR, MVT::v4i32, Custom);
-
setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
@@ -1958,6 +1956,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// AVX512_FP16 scalar operations
setGroup(MVT::f16);
addRegisterClass(MVT::f16, &X86::FR16XRegClass);
+ setOperationAction(ISD::FREM, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FREM, MVT::f16, Promote);
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
setOperationAction(ISD::SETCC, MVT::f16, Custom);
@@ -12571,6 +12571,8 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
if (ForceV2Zero)
V2 = getZeroVector(VT, Subtarget, DAG, DL);
+ unsigned NumElts = VT.getVectorNumElements();
+
switch (VT.SimpleTy) {
case MVT::v4i64:
case MVT::v8i32:
@@ -12629,8 +12631,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
return Masked;
if (Subtarget.hasBWI() && Subtarget.hasVLX()) {
- MVT IntegerType =
- MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
+ MVT IntegerType = MVT::getIntegerVT(std::max<unsigned>(NumElts, 8));
SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
}
@@ -12699,8 +12700,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
// Otherwise load an immediate into a GPR, cast to k-register, and use a
// masked move.
- MVT IntegerType =
- MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
+ MVT IntegerType = MVT::getIntegerVT(std::max<unsigned>(NumElts, 8));
SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
}
@@ -29843,7 +29843,8 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
{Op0, Op1, Amt}, DAG, Subtarget);
}
assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 ||
- VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) &&
+ VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v8i32 ||
+ VT == MVT::v16i32) &&
"Unexpected funnel shift type!");
// fshl(x,y,z) -> unpack(y,x) << (z & (bw-1))) >> bw.
@@ -29855,6 +29856,10 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
bool IsCst = ISD::isBuildVectorOfConstantSDNodes(AmtMod.getNode());
+ // Constant vXi16 funnel shifts can be efficiently handled by default.
+ if (IsCst && EltSizeInBits == 16)
+ return SDValue();
+
unsigned ShiftOpc = IsFSHR ? ISD::SRL : ISD::SHL;
unsigned NumElts = VT.getVectorNumElements();
MVT ExtSVT = MVT::getIntegerVT(2 * EltSizeInBits);
@@ -29874,6 +29879,10 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
// Attempt to fold scalar shift as unpack(y,x) << zext(splat(z))
if (supportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, ShiftOpc)) {
if (SDValue ScalarAmt = DAG.getSplatValue(AmtMod)) {
+ // Uniform vXi16 funnel shifts can be efficiently handled by default.
+ if (EltSizeInBits == 16)
+ return SDValue();
+
SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0));
SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0));
ScalarAmt = DAG.getZExtOrTrunc(ScalarAmt, DL, MVT::i32);
@@ -29912,7 +29921,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
}
// Attempt to fold per-element (ExtVT) shift as unpack(y,x) << zext(z)
- if ((IsCst && !IsFSHR && EltSizeInBits == 8) ||
+ if (((IsCst || !Subtarget.hasAVX512()) && !IsFSHR && EltSizeInBits <= 16) ||
supportedVectorVarShift(ExtVT, Subtarget, ShiftOpc)) {
SDValue Z = DAG.getConstant(0, DL, VT);
SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0));
@@ -36477,9 +36486,8 @@ static SDValue narrowLoadToVZLoad(LoadSDNode *LN, MVT MemVT, MVT VT,
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
bool AllowFloatDomain, bool AllowIntDomain,
- SDValue &V1, const SDLoc &DL, SelectionDAG &DAG,
- const X86Subtarget &Subtarget, unsigned &Shuffle,
- MVT &SrcVT, MVT &DstVT) {
+ SDValue V1, const X86Subtarget &Subtarget,
+ unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) {
unsigned NumMaskElts = Mask.size();
unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
@@ -36522,9 +36530,6 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
MVT::getIntegerVT(MaskEltSize);
SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize);
- if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits())
- V1 = extractSubVector(V1, 0, DAG, DL, SrcSize);
-
Shuffle = unsigned(MatchAny ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND);
if (SrcVT.getVectorNumElements() != NumDstElts)
Shuffle = getOpcode_EXTEND_VECTOR_INREG(Shuffle);
@@ -37102,6 +37107,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
assert((Inputs.size() == 1 || Inputs.size() == 2) &&
"Unexpected number of shuffle inputs!");
+ SDLoc DL(Root);
MVT RootVT = Root.getSimpleValueType();
unsigned RootSizeInBits = RootVT.getSizeInBits();
unsigned NumRootElts = RootVT.getVectorNumElements();
@@ -37109,6 +37115,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Canonicalize shuffle input op to the requested type.
// TODO: Support cases where Op is smaller than VT.
auto CanonicalizeShuffleInput = [&](MVT VT, SDValue Op) {
+ if (VT.getSizeInBits() < Op.getValueSizeInBits())
+ Op = extractSubVector(Op, 0, DAG, DL, VT.getSizeInBits());
return DAG.getBitcast(VT, Op);
};
@@ -37124,7 +37132,6 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
assert(VT1.getSizeInBits() == RootSizeInBits &&
VT2.getSizeInBits() == RootSizeInBits && "Vector size mismatch");
- SDLoc DL(Root);
SDValue Res;
unsigned NumBaseMaskElts = BaseMask.size();
@@ -37393,15 +37400,13 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
- SDValue NewV1 = V1; // Save operand in case early exit happens.
- if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1,
- DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
- ShuffleVT) &&
+ if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, V1,
+ Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) &&
(!IsMaskedShuffle ||
(NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
- Res = CanonicalizeShuffleInput(ShuffleSrcVT, NewV1);
+ Res = CanonicalizeShuffleInput(ShuffleSrcVT, V1);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
return DAG.getBitcast(RootVT, Res);
}
@@ -40903,6 +40908,28 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
Known.One.setHighBits(ShAmt);
return false;
}
+ case X86ISD::BLENDV: {
+ SDValue Sel = Op.getOperand(0);
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+
+ APInt SignMask = APInt::getSignMask(BitWidth);
+ SDValue NewSel = SimplifyMultipleUseDemandedBits(
+ Sel, SignMask, OriginalDemandedElts, TLO.DAG, Depth + 1);
+ SDValue NewLHS = SimplifyMultipleUseDemandedBits(
+ LHS, OriginalDemandedBits, OriginalDemandedElts, TLO.DAG, Depth + 1);
+ SDValue NewRHS = SimplifyMultipleUseDemandedBits(
+ RHS, OriginalDemandedBits, OriginalDemandedElts, TLO.DAG, Depth + 1);
+
+ if (NewSel || NewLHS || NewRHS) {
+ NewSel = NewSel ? NewSel : Sel;
+ NewLHS = NewLHS ? NewLHS : LHS;
+ NewRHS = NewRHS ? NewRHS : RHS;
+ return TLO.CombineTo(Op, TLO.DAG.getNode(X86ISD::BLENDV, SDLoc(Op), VT,
+ NewSel, NewLHS, NewRHS));
+ }
+ break;
+ }
case X86ISD::PEXTRB:
case X86ISD::PEXTRW: {
SDValue Vec = Op.getOperand(0);
@@ -41043,6 +41070,13 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
if (OriginalDemandedBits.countTrailingZeros() >= NumElts)
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+ // See if we only demand bits from the lower 128-bit vector.
+ if (SrcVT.is256BitVector() &&
+ OriginalDemandedBits.getActiveBits() <= (NumElts / 2)) {
+ SDValue NewSrc = extract128BitVector(Src, 0, TLO.DAG, SDLoc(Src));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
+ }
+
// Only demand the vector elements of the sign bits we need.
APInt KnownUndef, KnownZero;
APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
@@ -42238,19 +42272,14 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
Movmsk = DAG.getBitcast(MovmskVT, Match);
} else {
- // For all_of(setcc(x,y,eq))
- // - avoid vXi64 comparisons without PCMPEQQ (SSE41+), use PCMPEQD.
- // - avoid vXi16 comparisons, use PMOVMSKB(PCMPEQB()).
+ // For all_of(setcc(x,y,eq)) - use PMOVMSKB(PCMPEQB()).
if (BinOp == ISD::AND && Match.getOpcode() == ISD::SETCC &&
cast<CondCodeSDNode>(Match.getOperand(2))->get() ==
ISD::CondCode::SETEQ) {
- SDValue Vec = Match.getOperand(0);
- EVT VecSVT = Vec.getValueType().getScalarType();
- if ((VecSVT == MVT::i16 && !Subtarget.hasBWI()) ||
- (VecSVT == MVT::i64 && !Subtarget.hasSSE41())) {
- NumElts *= 2;
- VecSVT = VecSVT.getHalfSizedIntegerVT(*DAG.getContext());
- EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), VecSVT, NumElts);
+ EVT VecSVT = Match.getOperand(0).getValueType().getScalarType();
+ if (VecSVT != MVT::i8) {
+ NumElts *= VecSVT.getSizeInBits() / 8;
+ EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, NumElts);
MatchVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
Match = DAG.getSetCC(
DL, MatchVT, DAG.getBitcast(CmpVT, Match.getOperand(0)),
@@ -43079,6 +43108,38 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
}
}
+ // If this extract is from a loaded vector value and will be used as an
+ // integer, that requires a potentially expensive XMM -> GPR transfer.
+ // Additionally, if we can convert to a scalar integer load, that will likely
+ // be folded into a subsequent integer op.
+ // Note: Unlike the related fold for this in DAGCombiner, this is not limited
+ // to a single-use of the loaded vector. For the reasons above, we
+ // expect this to be profitable even if it creates an extra load.
+ bool LikelyUsedAsVector = any_of(N->uses(), [](SDNode *Use) {
+ return Use->getOpcode() == ISD::STORE ||
+ Use->getOpcode() == ISD::INSERT_VECTOR_ELT ||
+ Use->getOpcode() == ISD::SCALAR_TO_VECTOR;
+ });
+ auto *LoadVec = dyn_cast<LoadSDNode>(InputVector);
+ if (LoadVec && CIdx && ISD::isNormalLoad(LoadVec) && VT.isInteger() &&
+ SrcVT.getVectorElementType() == VT && DCI.isAfterLegalizeDAG() &&
+ !LikelyUsedAsVector) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue NewPtr =
+ TLI.getVectorElementPointer(DAG, LoadVec->getBasePtr(), SrcVT, EltIdx);
+ unsigned PtrOff = VT.getSizeInBits() * CIdx->getZExtValue() / 8;
+ MachinePointerInfo MPI = LoadVec->getPointerInfo().getWithOffset(PtrOff);
+ Align Alignment = commonAlignment(LoadVec->getAlign(), PtrOff);
+ SDValue Load =
+ DAG.getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment,
+ LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo());
+ SDValue Chain = Load.getValue(1);
+ SDValue From[] = {SDValue(N, 0), SDValue(LoadVec, 1)};
+ SDValue To[] = {Load, Chain};
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ return SDValue(N, 0);
+ }
+
return SDValue();
}
@@ -44467,8 +44528,8 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
unsigned NumEltBits = VecVT.getScalarSizeInBits();
bool IsAnyOf = CmpOpcode == X86ISD::CMP && CmpVal.isZero();
- bool IsAllOf = CmpOpcode == X86ISD::SUB && NumElts <= CmpBits &&
- CmpVal.isMask(NumElts);
+ bool IsAllOf = (CmpOpcode == X86ISD::SUB || CmpOpcode == X86ISD::CMP) &&
+ NumElts <= CmpBits && CmpVal.isMask(NumElts);
if (!IsAnyOf && !IsAllOf)
return SDValue();
@@ -44500,14 +44561,16 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
// MOVMSK(CONCAT(X,Y)) != 0 -> MOVMSK(OR(X,Y)).
// MOVMSK(CONCAT(X,Y)) == -1 -> MOVMSK(AND(X,Y)).
// MOVMSK(CONCAT(X,Y)) != -1 -> MOVMSK(AND(X,Y)).
- if (VecVT.is256BitVector()) {
+ if (VecVT.is256BitVector() && NumElts <= CmpBits) {
SmallVector<SDValue> Ops;
if (collectConcatOps(peekThroughBitcasts(Vec).getNode(), Ops) &&
Ops.size() == 2) {
SDLoc DL(EFLAGS);
- EVT SubVT = Ops[0].getValueType();
+ EVT SubVT = Ops[0].getValueType().changeTypeToInteger();
APInt CmpMask = APInt::getLowBitsSet(32, IsAnyOf ? 0 : NumElts / 2);
- SDValue V = DAG.getNode(IsAnyOf ? ISD::OR : ISD::AND, DL, SubVT, Ops);
+ SDValue V = DAG.getNode(IsAnyOf ? ISD::OR : ISD::AND, DL, SubVT,
+ DAG.getBitcast(SubVT, Ops[0]),
+ DAG.getBitcast(SubVT, Ops[1]));
V = DAG.getBitcast(VecVT.getHalfNumVectorElementsVT(), V);
return DAG.getNode(X86ISD::CMP, DL, MVT::i32,
DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V),
@@ -44522,26 +44585,29 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
if (IsAllOf && Subtarget.hasSSE41()) {
MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
SDValue BC = peekThroughBitcasts(Vec);
- if (BC.getOpcode() == X86ISD::PCMPEQ) {
- SDValue V = DAG.getNode(ISD::SUB, SDLoc(BC), BC.getValueType(),
- BC.getOperand(0), BC.getOperand(1));
- V = DAG.getBitcast(TestVT, V);
- return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
- }
- // Check for 256-bit split vector cases.
- if (BC.getOpcode() == ISD::AND &&
- BC.getOperand(0).getOpcode() == X86ISD::PCMPEQ &&
- BC.getOperand(1).getOpcode() == X86ISD::PCMPEQ) {
- SDValue LHS = BC.getOperand(0);
- SDValue RHS = BC.getOperand(1);
- LHS = DAG.getNode(ISD::SUB, SDLoc(LHS), LHS.getValueType(),
- LHS.getOperand(0), LHS.getOperand(1));
- RHS = DAG.getNode(ISD::SUB, SDLoc(RHS), RHS.getValueType(),
- RHS.getOperand(0), RHS.getOperand(1));
- LHS = DAG.getBitcast(TestVT, LHS);
- RHS = DAG.getBitcast(TestVT, RHS);
- SDValue V = DAG.getNode(ISD::OR, SDLoc(EFLAGS), TestVT, LHS, RHS);
- return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
+ // Ensure MOVMSK was testing every signbit of BC.
+ if (BC.getValueType().getVectorNumElements() <= NumElts) {
+ if (BC.getOpcode() == X86ISD::PCMPEQ) {
+ SDValue V = DAG.getNode(ISD::SUB, SDLoc(BC), BC.getValueType(),
+ BC.getOperand(0), BC.getOperand(1));
+ V = DAG.getBitcast(TestVT, V);
+ return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
+ }
+ // Check for 256-bit split vector cases.
+ if (BC.getOpcode() == ISD::AND &&
+ BC.getOperand(0).getOpcode() == X86ISD::PCMPEQ &&
+ BC.getOperand(1).getOpcode() == X86ISD::PCMPEQ) {
+ SDValue LHS = BC.getOperand(0);
+ SDValue RHS = BC.getOperand(1);
+ LHS = DAG.getNode(ISD::SUB, SDLoc(LHS), LHS.getValueType(),
+ LHS.getOperand(0), LHS.getOperand(1));
+ RHS = DAG.getNode(ISD::SUB, SDLoc(RHS), RHS.getValueType(),
+ RHS.getOperand(0), RHS.getOperand(1));
+ LHS = DAG.getBitcast(TestVT, LHS);
+ RHS = DAG.getBitcast(TestVT, RHS);
+ SDValue V = DAG.getNode(ISD::OR, SDLoc(EFLAGS), TestVT, LHS, RHS);
+ return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
+ }
}
}
@@ -44575,7 +44641,8 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
if (SDValue Src = getSplitVectorSrc(VecOp0, VecOp1, true)) {
SDLoc DL(EFLAGS);
SDValue Result = peekThroughBitcasts(Src);
- if (IsAllOf && Result.getOpcode() == X86ISD::PCMPEQ) {
+ if (IsAllOf && Result.getOpcode() == X86ISD::PCMPEQ &&
+ Result.getValueType().getVectorNumElements() <= NumElts) {
SDValue V = DAG.getNode(ISD::SUB, DL, Result.getValueType(),
Result.getOperand(0), Result.getOperand(1));
V = DAG.getBitcast(MVT::v4i64, V);
@@ -46840,14 +46907,18 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (!getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, EltBits))
return false;
+ APInt DemandedBits = APInt::getZero(EltSizeInBits);
APInt DemandedElts = APInt::getZero(NumElts);
for (int I = 0; I != NumElts; ++I)
- if (!EltBits[I].isZero())
+ if (!EltBits[I].isZero()) {
+ DemandedBits |= EltBits[I];
DemandedElts.setBit(I);
+ }
APInt KnownUndef, KnownZero;
return TLI.SimplifyDemandedVectorElts(OtherOp, DemandedElts, KnownUndef,
- KnownZero, DCI);
+ KnownZero, DCI) ||
+ TLI.SimplifyDemandedBits(OtherOp, DemandedBits, DemandedElts, DCI);
};
if (SimplifyUndemandedElts(N0, N1) || SimplifyUndemandedElts(N1, N0)) {
if (N->getOpcode() != ISD::DELETED_NODE)
@@ -49031,8 +49102,13 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
return SDValue();
// SSSE3's pshufb results in less instructions in the cases below.
- if (Subtarget.hasSSSE3() && NumElems == 8 && InSVT != MVT::i64)
- return SDValue();
+ if (Subtarget.hasSSSE3() && NumElems == 8) {
+ if (InSVT == MVT::i16)
+ return SDValue();
+ if (InSVT == MVT::i32 &&
+ (OutSVT == MVT::i8 || !Subtarget.hasSSE41() || Subtarget.hasInt256()))
+ return SDValue();
+ }
SDLoc DL(N);
// SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS
@@ -51110,6 +51186,30 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(NotMask, DL, VT));
}
+ // Fold movmsk(icmp_eq(and(x,c1),0)) -> movmsk(not(shl(x,c2)))
+ // iff pow2splat(c1).
+ if (Src.getOpcode() == X86ISD::PCMPEQ &&
+ Src.getOperand(0).getOpcode() == ISD::AND &&
+ ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode())) {
+ SDValue LHS = Src.getOperand(0).getOperand(0);
+ SDValue RHS = Src.getOperand(0).getOperand(1);
+ KnownBits KnownRHS = DAG.computeKnownBits(RHS);
+ if (KnownRHS.isConstant() && KnownRHS.getConstant().isPowerOf2()) {
+ SDLoc DL(N);
+ MVT ShiftVT = SrcVT;
+ if (ShiftVT.getScalarType() == MVT::i8) {
+ // vXi8 shifts - we only care about the signbit so can use PSLLW.
+ ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
+ LHS = DAG.getBitcast(ShiftVT, LHS);
+ }
+ unsigned ShiftAmt = KnownRHS.getConstant().countLeadingZeros();
+ LHS = getTargetVShiftByConstNode(X86ISD::VSHLI, DL, ShiftVT, LHS,
+ ShiftAmt, DAG);
+ LHS = DAG.getNOT(DL, DAG.getBitcast(SrcVT, LHS), SrcVT);
+ return DAG.getNode(X86ISD::MOVMSK, DL, VT, LHS);
+ }
+ }
+
// Simplify the inputs.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt DemandedMask(APInt::getAllOnes(NumBits));
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index 7368b64efd9a..6206d8efb3d0 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -61,6 +61,8 @@
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/Local.h"
+#include <map>
+
using namespace llvm;
using namespace PatternMatch;
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 92acfb93057a..9c16d3750998 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -23,6 +23,7 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index ce3c5153bde2..e6a542385662 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -46,6 +46,7 @@
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Argument.h"
@@ -365,26 +366,25 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Loop over the argument list, transferring uses of the old arguments over to
// the new arguments, also transferring over the names as well.
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
- I2 = NF->arg_begin();
- I != E; ++I) {
- if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
+ Function::arg_iterator I2 = NF->arg_begin();
+ for (Argument &Arg : F->args()) {
+ if (!ArgsToPromote.count(&Arg) && !ByValArgsToTransform.count(&Arg)) {
// If this is an unmodified argument, move the name and users over to the
// new version.
- I->replaceAllUsesWith(&*I2);
- I2->takeName(&*I);
+ Arg.replaceAllUsesWith(&*I2);
+ I2->takeName(&Arg);
++I2;
continue;
}
- if (ByValArgsToTransform.count(&*I)) {
+ if (ByValArgsToTransform.count(&Arg)) {
// In the callee, we create an alloca, and store each of the new incoming
// arguments into the alloca.
Instruction *InsertPt = &NF->begin()->front();
// Just add all the struct element types.
- Type *AgTy = I->getParamByValType();
- Align StructAlign = *I->getParamAlign();
+ Type *AgTy = Arg.getParamByValType();
+ Align StructAlign = *Arg.getParamAlign();
Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr,
StructAlign, "", InsertPt);
StructType *STy = cast<StructType>(AgTy);
@@ -397,41 +397,41 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
Value *Idx = GetElementPtrInst::Create(
AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
InsertPt);
- I2->setName(I->getName() + "." + Twine(i));
+ I2->setName(Arg.getName() + "." + Twine(i));
Align Alignment = commonAlignment(StructAlign, SL->getElementOffset(i));
new StoreInst(&*I2++, Idx, false, Alignment, InsertPt);
}
// Anything that used the arg should now use the alloca.
- I->replaceAllUsesWith(TheAlloca);
- TheAlloca->takeName(&*I);
+ Arg.replaceAllUsesWith(TheAlloca);
+ TheAlloca->takeName(&Arg);
continue;
}
// There potentially are metadata uses for things like llvm.dbg.value.
// Replace them with undef, after handling the other regular uses.
auto RauwUndefMetadata = make_scope_exit(
- [&]() { I->replaceAllUsesWith(UndefValue::get(I->getType())); });
+ [&]() { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); });
- if (I->use_empty())
+ if (Arg.use_empty())
continue;
// Otherwise, if we promoted this argument, then all users are load
// instructions (or GEPs with only load users), and all loads should be
// using the new argument that we added.
- ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+ ScalarizeTable &ArgIndices = ScalarizedElements[&Arg];
- while (!I->use_empty()) {
- if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
+ while (!Arg.use_empty()) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(Arg.user_back())) {
assert(ArgIndices.begin()->second.empty() &&
"Load element should sort to front!");
- I2->setName(I->getName() + ".val");
+ I2->setName(Arg.getName() + ".val");
LI->replaceAllUsesWith(&*I2);
LI->eraseFromParent();
- LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
+ LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << Arg.getName()
<< "' in function '" << F->getName() << "'\n");
} else {
- GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(Arg.user_back());
assert(!GEP->use_empty() &&
"GEPs without uses should be cleaned up already");
IndicesVector Operands;
@@ -449,7 +449,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
assert(It != ArgIndices.end() && "GEP not handled??");
}
- TheArg->setName(formatv("{0}.{1:$[.]}.val", I->getName(),
+ TheArg->setName(formatv("{0}.{1:$[.]}.val", Arg.getName(),
make_range(Operands.begin(), Operands.end())));
LLVM_DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
@@ -610,12 +610,12 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR
return true;
};
- // First, iterate the entry block and mark loads of (geps of) arguments as
- // safe.
+ // First, iterate functions that are guaranteed to execution on function
+ // entry and mark loads of (geps of) arguments as safe.
BasicBlock &EntryBlock = Arg->getParent()->front();
// Declare this here so we can reuse it
IndicesVector Indices;
- for (Instruction &I : EntryBlock)
+ for (Instruction &I : EntryBlock) {
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
Value *V = LI->getPointerOperand();
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
@@ -649,6 +649,10 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR
}
}
+ if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+ break;
+ }
+
// Now, iterate all uses of the argument to see if there are any uses that are
// not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
SmallVector<LoadInst *, 16> Loads;
@@ -830,7 +834,10 @@ static bool canPaddingBeAccessed(Argument *arg) {
return false;
}
-bool ArgumentPromotionPass::areFunctionArgsABICompatible(
+/// Check if callers and the callee \p F agree how promoted arguments would be
+/// passed. The ones that they do not agree on are eliminated from the sets but
+/// the return value has to be observed as well.
+static bool areFunctionArgsABICompatible(
const Function &F, const TargetTransformInfo &TTI,
SmallPtrSetImpl<Argument *> &ArgsToPromote,
SmallPtrSetImpl<Argument *> &ByValArgsToTransform) {
@@ -1003,7 +1010,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
return nullptr;
- if (!ArgumentPromotionPass::areFunctionArgsABICompatible(
+ if (!areFunctionArgsABICompatible(
*F, TTI, ArgsToPromote, ByValArgsToTransform))
return nullptr;
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 12b8a0ef9d00..d66140a726f6 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -183,6 +183,31 @@ ChangeStatus &llvm::operator&=(ChangeStatus &L, ChangeStatus R) {
}
///}
+bool AA::isNoSyncInst(Attributor &A, const Instruction &I,
+ const AbstractAttribute &QueryingAA) {
+ // We are looking for volatile instructions or non-relaxed atomics.
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->hasFnAttr(Attribute::NoSync))
+ return true;
+
+ // Non-convergent and readnone imply nosync.
+ if (!CB->isConvergent() && !CB->mayReadOrWriteMemory())
+ return true;
+
+ if (AANoSync::isNoSyncIntrinsic(&I))
+ return true;
+
+ const auto &NoSyncAA = A.getAAFor<AANoSync>(
+ QueryingAA, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL);
+ return NoSyncAA.isAssumedNoSync();
+ }
+
+ if (!I.mayReadOrWriteMemory())
+ return true;
+
+ return !I.isVolatile() && !AANoSync::isNonRelaxedAtomic(&I);
+}
+
bool AA::isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
const Value &V) {
if (auto *C = dyn_cast<Constant>(&V))
@@ -370,6 +395,162 @@ bool AA::getPotentialCopiesOfStoredValue(
return true;
}
+static bool isAssumedReadOnlyOrReadNone(Attributor &A, const IRPosition &IRP,
+ const AbstractAttribute &QueryingAA,
+ bool RequireReadNone, bool &IsKnown) {
+
+ IRPosition::Kind Kind = IRP.getPositionKind();
+ if (Kind == IRPosition::IRP_FUNCTION || Kind == IRPosition::IRP_CALL_SITE) {
+ const auto &MemLocAA =
+ A.getAAFor<AAMemoryLocation>(QueryingAA, IRP, DepClassTy::NONE);
+ if (MemLocAA.isAssumedReadNone()) {
+ IsKnown = MemLocAA.isKnownReadNone();
+ if (!IsKnown)
+ A.recordDependence(MemLocAA, QueryingAA, DepClassTy::OPTIONAL);
+ return true;
+ }
+ }
+
+ const auto &MemBehaviorAA =
+ A.getAAFor<AAMemoryBehavior>(QueryingAA, IRP, DepClassTy::NONE);
+ if (MemBehaviorAA.isAssumedReadNone() ||
+ (!RequireReadNone && MemBehaviorAA.isAssumedReadOnly())) {
+ IsKnown = RequireReadNone ? MemBehaviorAA.isKnownReadNone()
+ : MemBehaviorAA.isKnownReadOnly();
+ if (!IsKnown)
+ A.recordDependence(MemBehaviorAA, QueryingAA, DepClassTy::OPTIONAL);
+ return true;
+ }
+
+ return false;
+}
+
+bool AA::isAssumedReadOnly(Attributor &A, const IRPosition &IRP,
+ const AbstractAttribute &QueryingAA, bool &IsKnown) {
+ return isAssumedReadOnlyOrReadNone(A, IRP, QueryingAA,
+ /* RequireReadNone */ false, IsKnown);
+}
+bool AA::isAssumedReadNone(Attributor &A, const IRPosition &IRP,
+ const AbstractAttribute &QueryingAA, bool &IsKnown) {
+ return isAssumedReadOnlyOrReadNone(A, IRP, QueryingAA,
+ /* RequireReadNone */ true, IsKnown);
+}
+
+static bool
+isPotentiallyReachable(Attributor &A, const Instruction &FromI,
+ const Instruction *ToI, const Function &ToFn,
+ const AbstractAttribute &QueryingAA,
+ std::function<bool(const Function &F)> GoBackwardsCB) {
+ LLVM_DEBUG(dbgs() << "[AA] isPotentiallyReachable @" << ToFn.getName()
+ << " from " << FromI << " [GBCB: " << bool(GoBackwardsCB)
+ << "]\n");
+
+ SmallPtrSet<const Instruction *, 8> Visited;
+ SmallVector<const Instruction *> Worklist;
+ Worklist.push_back(&FromI);
+
+ while (!Worklist.empty()) {
+ const Instruction *CurFromI = Worklist.pop_back_val();
+ if (!Visited.insert(CurFromI).second)
+ continue;
+
+ const Function *FromFn = CurFromI->getFunction();
+ if (FromFn == &ToFn) {
+ if (!ToI)
+ return true;
+ LLVM_DEBUG(dbgs() << "[AA] check " << *ToI << " from " << *CurFromI
+ << " intraprocedurally\n");
+ const auto &ReachabilityAA = A.getAAFor<AAReachability>(
+ QueryingAA, IRPosition::function(ToFn), DepClassTy::OPTIONAL);
+ bool Result = ReachabilityAA.isAssumedReachable(A, *CurFromI, *ToI);
+ LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " "
+ << (Result ? "can potentially " : "cannot ") << "reach "
+ << *ToI << " [Intra]\n");
+ if (Result)
+ return true;
+ continue;
+ }
+
+ // TODO: If we can go arbitrarily backwards we will eventually reach an
+ // entry point that can reach ToI. Only once this takes a set of blocks
+ // through which we cannot go, or once we track internal functions not
+ // accessible from the outside, it makes sense to perform backwards analysis
+ // in the absence of a GoBackwardsCB.
+ if (!GoBackwardsCB) {
+ LLVM_DEBUG(dbgs() << "[AA] check @" << ToFn.getName() << " from "
+ << *CurFromI << " is not checked backwards, abort\n");
+ return true;
+ }
+
+ // Check if the current instruction is already known to reach the ToFn.
+ const auto &FnReachabilityAA = A.getAAFor<AAFunctionReachability>(
+ QueryingAA, IRPosition::function(*FromFn), DepClassTy::OPTIONAL);
+ bool Result = FnReachabilityAA.instructionCanReach(
+ A, *CurFromI, ToFn, /* UseBackwards */ false);
+ LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " in @" << FromFn->getName()
+ << " " << (Result ? "can potentially " : "cannot ")
+ << "reach @" << ToFn.getName() << " [FromFn]\n");
+ if (Result)
+ return true;
+
+ // If we do not go backwards from the FromFn we are done here and so far we
+ // could not find a way to reach ToFn/ToI.
+ if (!GoBackwardsCB(*FromFn))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Stepping backwards to the call sites of @"
+ << FromFn->getName() << "\n");
+
+ auto CheckCallSite = [&](AbstractCallSite ACS) {
+ CallBase *CB = ACS.getInstruction();
+ if (!CB)
+ return false;
+
+ if (isa<InvokeInst>(CB))
+ return false;
+
+ Instruction *Inst = CB->getNextNonDebugInstruction();
+ Worklist.push_back(Inst);
+ return true;
+ };
+
+ bool AllCallSitesKnown;
+ Result = !A.checkForAllCallSites(CheckCallSite, *FromFn,
+ /* RequireAllCallSites */ true,
+ &QueryingAA, AllCallSitesKnown);
+ if (Result) {
+ LLVM_DEBUG(dbgs() << "[AA] stepping back to call sites from " << *CurFromI
+ << " in @" << FromFn->getName()
+ << " failed, give up\n");
+ return true;
+ }
+
+ LLVM_DEBUG(dbgs() << "[AA] stepped back to call sites from " << *CurFromI
+ << " in @" << FromFn->getName()
+ << " worklist size is: " << Worklist.size() << "\n");
+ }
+ return false;
+}
+
+bool AA::isPotentiallyReachable(
+ Attributor &A, const Instruction &FromI, const Instruction &ToI,
+ const AbstractAttribute &QueryingAA,
+ std::function<bool(const Function &F)> GoBackwardsCB) {
+ LLVM_DEBUG(dbgs() << "[AA] isPotentiallyReachable " << ToI << " from "
+ << FromI << " [GBCB: " << bool(GoBackwardsCB) << "]\n");
+ const Function *ToFn = ToI.getFunction();
+ return ::isPotentiallyReachable(A, FromI, &ToI, *ToFn, QueryingAA,
+ GoBackwardsCB);
+}
+
+bool AA::isPotentiallyReachable(
+ Attributor &A, const Instruction &FromI, const Function &ToFn,
+ const AbstractAttribute &QueryingAA,
+ std::function<bool(const Function &F)> GoBackwardsCB) {
+ return ::isPotentiallyReachable(A, FromI, /* ToI */ nullptr, ToFn, QueryingAA,
+ GoBackwardsCB);
+}
+
/// Return true if \p New is equal or worse than \p Old.
static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) {
if (!Old.isIntAttribute())
@@ -704,9 +885,8 @@ void IRPosition::verify() {
"Expected a nullptr for an invalid position!");
return;
case IRP_FLOAT:
- assert((!isa<CallBase>(&getAssociatedValue()) &&
- !isa<Argument>(&getAssociatedValue())) &&
- "Expected specialized kind for call base and argument values!");
+ assert((!isa<Argument>(&getAssociatedValue())) &&
+ "Expected specialized kind for argument values!");
return;
case IRP_RETURNED:
assert(isa<Function>(getAsValuePtr()) &&
@@ -900,7 +1080,7 @@ bool Attributor::isAssumedDead(const Use &U,
UsedAssumedInformation, CheckBBLivenessOnly, DepClass);
}
- return isAssumedDead(IRPosition::value(*UserI), QueryingAA, FnLivenessAA,
+ return isAssumedDead(IRPosition::inst(*UserI), QueryingAA, FnLivenessAA,
UsedAssumedInformation, CheckBBLivenessOnly, DepClass);
}
@@ -923,7 +1103,8 @@ bool Attributor::isAssumedDead(const Instruction &I,
// If we have a context instruction and a liveness AA we use it.
if (FnLivenessAA &&
FnLivenessAA->getIRPosition().getAnchorScope() == I.getFunction() &&
- FnLivenessAA->isAssumedDead(&I)) {
+ (CheckBBLivenessOnly ? FnLivenessAA->isAssumedDead(I.getParent())
+ : FnLivenessAA->isAssumedDead(&I))) {
if (QueryingAA)
recordDependence(*FnLivenessAA, *QueryingAA, DepClass);
if (!FnLivenessAA->isKnownDead(&I))
@@ -934,8 +1115,9 @@ bool Attributor::isAssumedDead(const Instruction &I,
if (CheckBBLivenessOnly)
return false;
- const AAIsDead &IsDeadAA = getOrCreateAAFor<AAIsDead>(
- IRPosition::value(I, CBCtx), QueryingAA, DepClassTy::NONE);
+ const IRPosition IRP = IRPosition::inst(I, CBCtx);
+ const AAIsDead &IsDeadAA =
+ getOrCreateAAFor<AAIsDead>(IRP, QueryingAA, DepClassTy::NONE);
// Don't check liveness for AAIsDead.
if (QueryingAA == &IsDeadAA)
return false;
@@ -1035,8 +1217,14 @@ bool Attributor::checkForAllUses(
const Use *U = Worklist.pop_back_val();
if (isa<PHINode>(U->getUser()) && !Visited.insert(U).second)
continue;
- LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << **U << " in "
- << *U->getUser() << "\n");
+ LLVM_DEBUG({
+ if (auto *Fn = dyn_cast<Function>(U->getUser()))
+ dbgs() << "[Attributor] Check use: " << **U << " in " << Fn->getName()
+ << "\n";
+ else
+ dbgs() << "[Attributor] Check use: " << **U << " in " << *U->getUser()
+ << "\n";
+ });
bool UsedAssumedInformation = false;
if (isAssumedDead(*U, &QueryingAA, LivenessAA, UsedAssumedInformation,
CheckBBLivenessOnly, LivenessDepClass)) {
@@ -1126,8 +1314,14 @@ bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
SmallVector<const Use *, 8> Uses(make_pointer_range(Fn.uses()));
for (unsigned u = 0; u < Uses.size(); ++u) {
const Use &U = *Uses[u];
- LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << *U << " in "
- << *U.getUser() << "\n");
+ LLVM_DEBUG({
+ if (auto *Fn = dyn_cast<Function>(U))
+ dbgs() << "[Attributor] Check use: " << Fn->getName() << " in "
+ << *U.getUser() << "\n";
+ else
+ dbgs() << "[Attributor] Check use: " << *U << " in " << *U.getUser()
+ << "\n";
+ });
bool UsedAssumedInformation = false;
if (isAssumedDead(U, QueryingAA, nullptr, UsedAssumedInformation,
/* CheckBBLivenessOnly */ true)) {
@@ -1268,9 +1462,12 @@ static bool checkForAllInstructionsImpl(
for (Instruction *I : *Insts) {
// Skip dead instructions.
if (A && !CheckPotentiallyDead &&
- A->isAssumedDead(IRPosition::value(*I), QueryingAA, LivenessAA,
- UsedAssumedInformation, CheckBBLivenessOnly))
+ A->isAssumedDead(IRPosition::inst(*I), QueryingAA, LivenessAA,
+ UsedAssumedInformation, CheckBBLivenessOnly)) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Instruction " << *I
+ << " is potentially dead, skip!\n";);
continue;
+ }
if (!Pred(*I))
return false;
@@ -1329,7 +1526,7 @@ bool Attributor::checkForAllReadWriteInstructions(
for (Instruction *I :
InfoCache.getReadOrWriteInstsForFunction(*AssociatedFunction)) {
// Skip dead instructions.
- if (isAssumedDead(IRPosition::value(*I), &QueryingAA, &LivenessAA,
+ if (isAssumedDead(IRPosition::inst(*I), &QueryingAA, &LivenessAA,
UsedAssumedInformation))
continue;
@@ -1381,9 +1578,11 @@ void Attributor::runTillFixpoint() {
InvalidAA->Deps.pop_back();
AbstractAttribute *DepAA = cast<AbstractAttribute>(Dep.getPointer());
if (Dep.getInt() == unsigned(DepClassTy::OPTIONAL)) {
+ LLVM_DEBUG(dbgs() << " - recompute: " << *DepAA);
Worklist.insert(DepAA);
continue;
}
+ LLVM_DEBUG(dbgs() << " - invalidate: " << *DepAA);
DepAA->getState().indicatePessimisticFixpoint();
assert(DepAA->getState().isAtFixpoint() && "Expected fixpoint state!");
if (!DepAA->getState().isValidState())
@@ -1433,6 +1632,9 @@ void Attributor::runTillFixpoint() {
// Note that dependent ones are added above.
Worklist.clear();
Worklist.insert(ChangedAAs.begin(), ChangedAAs.end());
+ Worklist.insert(QueryAAsAwaitingUpdate.begin(),
+ QueryAAsAwaitingUpdate.end());
+ QueryAAsAwaitingUpdate.clear();
} while (!Worklist.empty() && (IterationCounter++ < MaxFixedPointIterations ||
VerifyMaxFixpointIterations));
@@ -1492,6 +1694,12 @@ void Attributor::runTillFixpoint() {
}
}
+void Attributor::registerForUpdate(AbstractAttribute &AA) {
+ assert(AA.isQueryAA() &&
+ "Non-query AAs should not be required to register for updates!");
+ QueryAAsAwaitingUpdate.insert(&AA);
+}
+
ChangeStatus Attributor::manifestAttributes() {
TimeTraceScope TimeScope("Attributor::manifestAttributes");
size_t NumFinalAAs = DG.SyntheticRoot.Deps.size();
@@ -1792,7 +2000,7 @@ ChangeStatus Attributor::cleanupIR() {
// Actually we do not delete the blocks but squash them into a single
// unreachable but untangling branches that jump here is something we need
// to do in a more generic way.
- DetatchDeadBlocks(ToBeDeletedBBs, nullptr);
+ detachDeadBlocks(ToBeDeletedBBs, nullptr);
}
identifyDeadInternalFunctions();
@@ -1897,7 +2105,7 @@ ChangeStatus Attributor::updateAA(AbstractAttribute &AA) {
/* CheckBBLivenessOnly */ true))
CS = AA.update(*this);
- if (DV.empty()) {
+ if (!AA.isQueryAA() && DV.empty()) {
// If the attribute did not query any non-fix information, the state
// will not change and we can indicate that right away.
AAState.indicateOptimisticFixpoint();
@@ -2601,12 +2809,12 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
auto CallSitePred = [&](Instruction &I) -> bool {
auto &CB = cast<CallBase>(I);
- IRPosition CBRetPos = IRPosition::callsite_returned(CB);
+ IRPosition CBInstPos = IRPosition::inst(CB);
IRPosition CBFnPos = IRPosition::callsite_function(CB);
// Call sites might be dead if they do not have side effects and no live
// users. The return value might be dead if there are no live users.
- getOrCreateAAFor<AAIsDead>(CBRetPos);
+ getOrCreateAAFor<AAIsDead>(CBInstPos);
Function *Callee = CB.getCalledFunction();
// TODO: Even if the callee is not known now we might be able to simplify
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 76420783b2d1..2d88e329e093 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -68,6 +69,12 @@ static cl::opt<unsigned, true> MaxPotentialValues(
cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues),
cl::init(7));
+static cl::opt<unsigned>
+ MaxInterferingWrites("attributor-max-interfering-writes", cl::Hidden,
+ cl::desc("Maximum number of interfering writes to "
+ "check before assuming all might interfere."),
+ cl::init(6));
+
STATISTIC(NumAAs, "Number of abstract attributes created");
// Some helper macros to deal with statistics tracking.
@@ -244,6 +251,8 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr,
/// once. Note that the value used for the callback may still be the value
/// associated with \p IRP (due to PHIs). To limit how much effort is invested,
/// we will never visit more values than specified by \p MaxValues.
+/// If \p Intraprocedural is set to true only values valid in the scope of
+/// \p CtxI will be visited and simplification into other scopes is prevented.
template <typename StateTy>
static bool genericValueTraversal(
Attributor &A, IRPosition IRP, const AbstractAttribute &QueryingAA,
@@ -251,7 +260,8 @@ static bool genericValueTraversal(
function_ref<bool(Value &, const Instruction *, StateTy &, bool)>
VisitValueCB,
const Instruction *CtxI, bool UseValueSimplify = true, int MaxValues = 16,
- function_ref<Value *(Value *)> StripCB = nullptr) {
+ function_ref<Value *(Value *)> StripCB = nullptr,
+ bool Intraprocedural = false) {
const AAIsDead *LivenessAA = nullptr;
if (IRP.getAnchorScope())
@@ -281,8 +291,11 @@ static bool genericValueTraversal(
continue;
// Make sure we limit the compile time for complex expressions.
- if (Iteration++ >= MaxValues)
+ if (Iteration++ >= MaxValues) {
+ LLVM_DEBUG(dbgs() << "Generic value traversal reached iteration limit: "
+ << Iteration << "!\n");
return false;
+ }
// Explicitly look through calls with a "returned" attribute if we do
// not have a pointer as stripPointerCasts only works on them.
@@ -331,10 +344,7 @@ static bool genericValueTraversal(
"Expected liveness in the presence of instructions!");
for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) {
BasicBlock *IncomingBB = PHI->getIncomingBlock(u);
- bool UsedAssumedInformation = false;
- if (A.isAssumedDead(*IncomingBB->getTerminator(), &QueryingAA,
- LivenessAA, UsedAssumedInformation,
- /* CheckBBLivenessOnly */ true)) {
+ if (LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) {
AnyDead = true;
continue;
}
@@ -344,24 +354,49 @@ static bool genericValueTraversal(
continue;
}
+ if (auto *Arg = dyn_cast<Argument>(V)) {
+ if (!Intraprocedural && !Arg->hasPassPointeeByValueCopyAttr()) {
+ SmallVector<Item> CallSiteValues;
+ bool AllCallSitesKnown = true;
+ if (A.checkForAllCallSites(
+ [&](AbstractCallSite ACS) {
+ // Callbacks might not have a corresponding call site operand,
+ // stick with the argument in that case.
+ Value *CSOp = ACS.getCallArgOperand(*Arg);
+ if (!CSOp)
+ return false;
+ CallSiteValues.push_back({CSOp, ACS.getInstruction()});
+ return true;
+ },
+ *Arg->getParent(), true, &QueryingAA, AllCallSitesKnown)) {
+ Worklist.append(CallSiteValues);
+ continue;
+ }
+ }
+ }
+
if (UseValueSimplify && !isa<Constant>(V)) {
bool UsedAssumedInformation = false;
Optional<Value *> SimpleV =
A.getAssumedSimplified(*V, QueryingAA, UsedAssumedInformation);
if (!SimpleV.hasValue())
continue;
- if (!SimpleV.getValue())
- return false;
Value *NewV = SimpleV.getValue();
- if (NewV != V) {
- Worklist.push_back({NewV, CtxI});
- continue;
+ if (NewV && NewV != V) {
+ if (!Intraprocedural || !CtxI ||
+ AA::isValidInScope(*NewV, CtxI->getFunction())) {
+ Worklist.push_back({NewV, CtxI});
+ continue;
+ }
}
}
// Once a leaf is reached we inform the user through the callback.
- if (!VisitValueCB(*V, CtxI, State, Iteration > 1))
+ if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) {
+ LLVM_DEBUG(dbgs() << "Generic value traversal visit callback failed for: "
+ << *V << "!\n");
return false;
+ }
} while (!Worklist.empty());
// If we actually used liveness information so we have to record a dependence.
@@ -375,7 +410,8 @@ static bool genericValueTraversal(
bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
SmallVectorImpl<Value *> &Objects,
const AbstractAttribute &QueryingAA,
- const Instruction *CtxI) {
+ const Instruction *CtxI,
+ bool Intraprocedural) {
auto StripCB = [&](Value *V) { return getUnderlyingObject(V); };
SmallPtrSet<Value *, 8> SeenObjects;
auto VisitValueCB = [&SeenObjects](Value &Val, const Instruction *,
@@ -387,7 +423,7 @@ bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
};
if (!genericValueTraversal<decltype(Objects)>(
A, IRPosition::value(Ptr), QueryingAA, Objects, VisitValueCB, CtxI,
- true, 32, StripCB))
+ true, 32, StripCB, Intraprocedural))
return false;
return true;
}
@@ -620,7 +656,7 @@ struct AACallSiteReturnedFromReturned : public BaseType {
if (!AssociatedFunction)
return S.indicatePessimisticFixpoint();
- CallBase &CBContext = static_cast<CallBase &>(this->getAnchorValue());
+ CallBase &CBContext = cast<CallBase>(this->getAnchorValue());
if (IntroduceCallBaseContext)
LLVM_DEBUG(dbgs() << "[Attributor] Introducing call base context:"
<< CBContext << "\n");
@@ -1026,7 +1062,6 @@ private:
BooleanState BS;
};
-namespace {
struct AAPointerInfoImpl
: public StateWrapper<AA::PointerInfo::State, AAPointerInfo> {
using BaseTy = StateWrapper<AA::PointerInfo::State, AAPointerInfo>;
@@ -1058,6 +1093,165 @@ struct AAPointerInfoImpl
const override {
return State::forallInterferingAccesses(SI, CB);
}
+ bool forallInterferingWrites(
+ Attributor &A, const AbstractAttribute &QueryingAA, LoadInst &LI,
+ function_ref<bool(const Access &, bool)> UserCB) const override {
+ SmallPtrSet<const Access *, 8> DominatingWrites;
+ SmallVector<std::pair<const Access *, bool>, 8> InterferingWrites;
+
+ Function &Scope = *LI.getFunction();
+ const auto &NoSyncAA = A.getAAFor<AANoSync>(
+ QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
+ const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>(
+ IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL);
+ const bool NoSync = NoSyncAA.isAssumedNoSync();
+
+ // Helper to determine if we need to consider threading, which we cannot
+ // right now. However, if the function is (assumed) nosync or the thread
+ // executing all instructions is the main thread only we can ignore
+ // threading.
+ auto CanIgnoreThreading = [&](const Instruction &I) -> bool {
+ if (NoSync)
+ return true;
+ if (ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I))
+ return true;
+ return false;
+ };
+
+ // Helper to determine if the access is executed by the same thread as the
+ // load, for now it is sufficient to avoid any potential threading effects
+ // as we cannot deal with them anyway.
+ auto IsSameThreadAsLoad = [&](const Access &Acc) -> bool {
+ return CanIgnoreThreading(*Acc.getLocalInst());
+ };
+
+ // TODO: Use inter-procedural reachability and dominance.
+ const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
+ QueryingAA, IRPosition::function(*LI.getFunction()),
+ DepClassTy::OPTIONAL);
+
+ const bool CanUseCFGResoning = CanIgnoreThreading(LI);
+ InformationCache &InfoCache = A.getInfoCache();
+ const DominatorTree *DT =
+ NoRecurseAA.isKnownNoRecurse()
+ ? InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(
+ Scope)
+ : nullptr;
+
+ enum GPUAddressSpace : unsigned {
+ Generic = 0,
+ Global = 1,
+ Shared = 3,
+ Constant = 4,
+ Local = 5,
+ };
+
+ // Helper to check if a value has "kernel lifetime", that is it will not
+ // outlive a GPU kernel. This is true for shared, constant, and local
+ // globals on AMD and NVIDIA GPUs.
+ auto HasKernelLifetime = [&](Value *V, Module &M) {
+ Triple T(M.getTargetTriple());
+ if (!(T.isAMDGPU() || T.isNVPTX()))
+ return false;
+ switch (V->getType()->getPointerAddressSpace()) {
+ case GPUAddressSpace::Shared:
+ case GPUAddressSpace::Constant:
+ case GPUAddressSpace::Local:
+ return true;
+ default:
+ return false;
+ };
+ };
+
+ // The IsLiveInCalleeCB will be used by the AA::isPotentiallyReachable query
+ // to determine if we should look at reachability from the callee. For
+ // certain pointers we know the lifetime and we do not have to step into the
+ // callee to determine reachability as the pointer would be dead in the
+ // callee. See the conditional initialization below.
+ std::function<bool(const Function &)> IsLiveInCalleeCB;
+
+ if (auto *AI = dyn_cast<AllocaInst>(&getAssociatedValue())) {
+ // If the alloca containing function is not recursive the alloca
+ // must be dead in the callee.
+ const Function *AIFn = AI->getFunction();
+ const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
+ *this, IRPosition::function(*AIFn), DepClassTy::OPTIONAL);
+ if (NoRecurseAA.isAssumedNoRecurse()) {
+ IsLiveInCalleeCB = [AIFn](const Function &Fn) { return AIFn != &Fn; };
+ }
+ } else if (auto *GV = dyn_cast<GlobalValue>(&getAssociatedValue())) {
+ // If the global has kernel lifetime we can stop if we reach a kernel
+ // as it is "dead" in the (unknown) callees.
+ if (HasKernelLifetime(GV, *GV->getParent()))
+ IsLiveInCalleeCB = [](const Function &Fn) {
+ return !Fn.hasFnAttribute("kernel");
+ };
+ }
+
+ auto AccessCB = [&](const Access &Acc, bool Exact) {
+ if (!Acc.isWrite())
+ return true;
+
+ // For now we only filter accesses based on CFG reasoning which does not
+ // work yet if we have threading effects, or the access is complicated.
+ if (CanUseCFGResoning) {
+ if (!AA::isPotentiallyReachable(A, *Acc.getLocalInst(), LI, QueryingAA,
+ IsLiveInCalleeCB))
+ return true;
+ if (DT && Exact &&
+ (Acc.getLocalInst()->getFunction() == LI.getFunction()) &&
+ IsSameThreadAsLoad(Acc)) {
+ if (DT->dominates(Acc.getLocalInst(), &LI))
+ DominatingWrites.insert(&Acc);
+ }
+ }
+
+ InterferingWrites.push_back({&Acc, Exact});
+ return true;
+ };
+ if (!State::forallInterferingAccesses(LI, AccessCB))
+ return false;
+
+ // If we cannot use CFG reasoning we only filter the non-write accesses
+ // and are done here.
+ if (!CanUseCFGResoning) {
+ for (auto &It : InterferingWrites)
+ if (!UserCB(*It.first, It.second))
+ return false;
+ return true;
+ }
+
+ // Helper to determine if we can skip a specific write access. This is in
+ // the worst case quadratic as we are looking for another write that will
+ // hide the effect of this one.
+ auto CanSkipAccess = [&](const Access &Acc, bool Exact) {
+ if (!IsSameThreadAsLoad(Acc))
+ return false;
+ if (!DominatingWrites.count(&Acc))
+ return false;
+ for (const Access *DomAcc : DominatingWrites) {
+ assert(Acc.getLocalInst()->getFunction() ==
+ DomAcc->getLocalInst()->getFunction() &&
+ "Expected dominating writes to be in the same function!");
+
+ if (DomAcc != &Acc &&
+ DT->dominates(Acc.getLocalInst(), DomAcc->getLocalInst())) {
+ return true;
+ }
+ }
+ return false;
+ };
+
+ // Run the user callback on all writes we cannot skip and return if that
+ // succeeded for all or not.
+ unsigned NumInterferingWrites = InterferingWrites.size();
+ for (auto &It : InterferingWrites)
+ if (!DT || NumInterferingWrites > MaxInterferingWrites ||
+ !CanSkipAccess(*It.first, It.second))
+ if (!UserCB(*It.first, It.second))
+ return false;
+ return true;
+ }
ChangeStatus translateAndAddCalleeState(Attributor &A,
const AAPointerInfo &CalleeAA,
@@ -1200,9 +1394,8 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
<< " : " << *Idx << "\n");
return false;
}
- UsrOI.Offset = PtrOI.Offset +
- DL.getIndexedOffsetInType(
- GEP->getSourceElementType(), Indices);
+ UsrOI.Offset = PtrOI.Offset + DL.getIndexedOffsetInType(
+ GEP->getSourceElementType(), Indices);
Follow = true;
return true;
}
@@ -1693,17 +1886,9 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
auto ReturnValueCB = [&](Value &V, const Instruction *CtxI, ReturnInst &Ret,
bool) -> bool {
- bool UsedAssumedInformation = false;
- Optional<Value *> SimpleRetVal =
- A.getAssumedSimplified(V, *this, UsedAssumedInformation);
- if (!SimpleRetVal.hasValue())
- return true;
- if (!SimpleRetVal.getValue())
- return false;
- Value *RetVal = *SimpleRetVal;
- assert(AA::isValidInScope(*RetVal, Ret.getFunction()) &&
+ assert(AA::isValidInScope(V, Ret.getFunction()) &&
"Assumed returned value should be valid in function scope!");
- if (ReturnedValues[RetVal].insert(&Ret))
+ if (ReturnedValues[&V].insert(&Ret))
Changed = ChangeStatus::CHANGED;
return true;
};
@@ -1712,7 +1897,8 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
ReturnInst &Ret = cast<ReturnInst>(I);
return genericValueTraversal<ReturnInst>(
A, IRPosition::value(*Ret.getReturnValue()), *this, Ret, ReturnValueCB,
- &I);
+ &I, /* UseValueSimplify */ true, /* MaxValues */ 16,
+ /* StripCB */ nullptr, /* Intraprocedural */ true);
};
// Discover returned values from all live returned instructions in the
@@ -1767,24 +1953,16 @@ struct AANoSyncImpl : AANoSync {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override;
-
- /// Helper function used to determine whether an instruction is non-relaxed
- /// atomic. In other words, if an atomic instruction does not have unordered
- /// or monotonic ordering
- static bool isNonRelaxedAtomic(Instruction *I);
-
- /// Helper function specific for intrinsics which are potentially volatile
- static bool isNoSyncIntrinsic(Instruction *I);
};
-bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) {
+bool AANoSync::isNonRelaxedAtomic(const Instruction *I) {
if (!I->isAtomic())
return false;
if (auto *FI = dyn_cast<FenceInst>(I))
// All legal orderings for fence are stronger than monotonic.
return FI->getSyncScopeID() != SyncScope::SingleThread;
- else if (auto *AI = dyn_cast<AtomicCmpXchgInst>(I)) {
+ if (auto *AI = dyn_cast<AtomicCmpXchgInst>(I)) {
// Unordered is not a legal ordering for cmpxchg.
return (AI->getSuccessOrdering() != AtomicOrdering::Monotonic ||
AI->getFailureOrdering() != AtomicOrdering::Monotonic);
@@ -1813,7 +1991,7 @@ bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) {
/// Return true if this intrinsic is nosync. This is only used for intrinsics
/// which would be nosync except that they have a volatile flag. All other
/// intrinsics are simply annotated with the nosync attribute in Intrinsics.td.
-bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) {
+bool AANoSync::isNoSyncIntrinsic(const Instruction *I) {
if (auto *MI = dyn_cast<MemIntrinsic>(I))
return !MI->isVolatile();
return false;
@@ -1822,24 +2000,7 @@ bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) {
ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
auto CheckRWInstForNoSync = [&](Instruction &I) {
- /// We are looking for volatile instructions or Non-Relaxed atomics.
-
- if (const auto *CB = dyn_cast<CallBase>(&I)) {
- if (CB->hasFnAttr(Attribute::NoSync))
- return true;
-
- if (isNoSyncIntrinsic(&I))
- return true;
-
- const auto &NoSyncAA = A.getAAFor<AANoSync>(
- *this, IRPosition::callsite_function(*CB), DepClassTy::REQUIRED);
- return NoSyncAA.isAssumedNoSync();
- }
-
- if (!I.isVolatile() && !isNonRelaxedAtomic(&I))
- return true;
-
- return false;
+ return AA::isNoSyncInst(A, I, *this);
};
auto CheckForNoSync = [&](Instruction &I) {
@@ -2327,16 +2488,6 @@ struct AANoRecurseFunction final : AANoRecurseImpl {
AANoRecurseFunction(const IRPosition &IRP, Attributor &A)
: AANoRecurseImpl(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoRecurseImpl::initialize(A);
- // TODO: We should build a call graph ourselves to enable this in the module
- // pass as well.
- if (const Function *F = getAnchorScope())
- if (A.getInfoCache().getSccSize(*F) != 1)
- indicatePessimisticFixpoint();
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
@@ -2359,27 +2510,10 @@ struct AANoRecurseFunction final : AANoRecurseImpl {
return ChangeStatus::UNCHANGED;
}
- // If the above check does not hold anymore we look at the calls.
- auto CheckForNoRecurse = [&](Instruction &I) {
- const auto &CB = cast<CallBase>(I);
- if (CB.hasFnAttr(Attribute::NoRecurse))
- return true;
-
- const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
- *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
- if (!NoRecurseAA.isAssumedNoRecurse())
- return false;
-
- // Recursion to the same function
- if (CB.getCalledFunction() == getAnchorScope())
- return false;
-
- return true;
- };
-
- bool UsedAssumedInformation = false;
- if (!A.checkForAllCallLikeInstructions(CheckForNoRecurse, *this,
- UsedAssumedInformation))
+ const AAFunctionReachability &EdgeReachability =
+ A.getAAFor<AAFunctionReachability>(*this, getIRPosition(),
+ DepClassTy::REQUIRED);
+ if (EdgeReachability.canReach(A, *getAnchorScope()))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
@@ -2798,16 +2932,10 @@ struct AAWillReturnImpl : public AAWillReturn {
(!getAssociatedFunction() || !getAssociatedFunction()->mustProgress()))
return false;
- const auto &MemAA =
- A.getAAFor<AAMemoryBehavior>(*this, getIRPosition(), DepClassTy::NONE);
- if (!MemAA.isAssumedReadOnly())
- return false;
- if (KnownOnly && !MemAA.isKnownReadOnly())
- return false;
- if (!MemAA.isKnownReadOnly())
- A.recordDependence(MemAA, *this, DepClassTy::OPTIONAL);
-
- return true;
+ bool IsKnown;
+ if (AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown))
+ return IsKnown || !KnownOnly;
+ return false;
}
/// See AbstractAttribute::updateImpl(...).
@@ -2904,6 +3032,10 @@ struct AAReachabilityImpl : AAReachability {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
+ const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
+ *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
+ if (!NoRecurseAA.isAssumedNoRecurse())
+ return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
};
@@ -3008,9 +3140,8 @@ struct AANoAliasArgument final
return Base::updateImpl(A);
// If the argument is read-only, no-alias cannot break synchronization.
- const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
- *this, getIRPosition(), DepClassTy::OPTIONAL);
- if (MemBehaviorAA.isAssumedReadOnly())
+ bool IsKnown;
+ if (AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown))
return Base::updateImpl(A);
// If the argument is never passed through callbacks, no-alias cannot break
@@ -3366,14 +3497,8 @@ struct AAIsDeadValueImpl : public AAIsDead {
if (!NoUnwindAA.isKnownNoUnwind())
A.recordDependence(NoUnwindAA, *this, DepClassTy::OPTIONAL);
- const auto &MemBehaviorAA =
- A.getAndUpdateAAFor<AAMemoryBehavior>(*this, CallIRP, DepClassTy::NONE);
- if (MemBehaviorAA.isAssumedReadOnly()) {
- if (!MemBehaviorAA.isKnownReadOnly())
- A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL);
- return true;
- }
- return false;
+ bool IsKnown;
+ return AA::isAssumedReadOnly(A, CallIRP, *this, IsKnown);
}
};
@@ -3699,6 +3824,7 @@ struct AAIsDeadFunction : public AAIsDead {
if (!AssumedLiveBlocks.count(&BB)) {
A.deleteAfterManifest(BB);
++BUILD_STAT_NAME(AAIsDead, BasicBlock);
+ HasChanged = ChangeStatus::CHANGED;
}
return HasChanged;
@@ -3708,7 +3834,7 @@ struct AAIsDeadFunction : public AAIsDead {
ChangeStatus updateImpl(Attributor &A) override;
bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const override {
- return !AssumedLiveEdges.count(std::make_pair(From, To));
+ return isValidState() && !AssumedLiveEdges.count(std::make_pair(From, To));
}
/// See AbstractAttribute::trackStatistics()
@@ -4921,14 +5047,11 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
AANoCapture::StateType T;
// Readonly means we cannot capture through memory.
- const auto &FnMemAA =
- A.getAAFor<AAMemoryBehavior>(*this, FnPos, DepClassTy::NONE);
- if (FnMemAA.isAssumedReadOnly()) {
+ bool IsKnown;
+ if (AA::isAssumedReadOnly(A, FnPos, *this, IsKnown)) {
T.addKnownBits(NOT_CAPTURED_IN_MEM);
- if (FnMemAA.isKnownReadOnly())
+ if (IsKnown)
addKnownBits(NOT_CAPTURED_IN_MEM);
- else
- A.recordDependence(FnMemAA, *this, DepClassTy::OPTIONAL);
}
// Make sure all returned values are different than the underlying value.
@@ -5085,7 +5208,6 @@ struct AANoCaptureCallSiteReturned final : AANoCaptureImpl {
STATS_DECLTRACK_CSRET_ATTR(nocapture)
}
};
-} // namespace
/// ------------------ Value Simplify Attribute ----------------------------
@@ -5106,7 +5228,6 @@ bool ValueSimplifyStateType::unionAssumed(Optional<Value *> Other) {
return true;
}
-namespace {
struct AAValueSimplifyImpl : AAValueSimplify {
AAValueSimplifyImpl(const IRPosition &IRP, Attributor &A)
: AAValueSimplify(IRP, A) {}
@@ -5266,8 +5387,6 @@ struct AAValueSimplifyImpl : AAValueSimplify {
auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
LLVM_DEBUG(dbgs() << " - visit access " << Acc << "\n");
- if (!Acc.isWrite())
- return true;
if (Acc.isWrittenValueYetUndetermined())
return true;
Value *Content = Acc.getWrittenValue();
@@ -5287,7 +5406,7 @@ struct AAValueSimplifyImpl : AAValueSimplify {
auto &PI = A.getAAFor<AAPointerInfo>(AA, IRPosition::value(*Obj),
DepClassTy::REQUIRED);
- if (!PI.forallInterferingAccesses(L, CheckAccess))
+ if (!PI.forallInterferingWrites(A, AA, L, CheckAccess))
return false;
}
return true;
@@ -5325,9 +5444,8 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
if (Arg->hasByValAttr()) {
// TODO: We probably need to verify synchronization is not an issue, e.g.,
// there is no race by not copying a constant byval.
- const auto &MemAA = A.getAAFor<AAMemoryBehavior>(*this, getIRPosition(),
- DepClassTy::REQUIRED);
- if (!MemAA.isAssumedReadOnly())
+ bool IsKnown;
+ if (!AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown))
return indicatePessimisticFixpoint();
}
@@ -6827,9 +6945,8 @@ struct AAPrivatizablePtrCallSiteArgument final
return indicatePessimisticFixpoint();
}
- const auto &MemBehaviorAA =
- A.getAAFor<AAMemoryBehavior>(*this, IRP, DepClassTy::REQUIRED);
- if (!MemBehaviorAA.isAssumedReadOnly()) {
+ bool IsKnown;
+ if (!AA::isAssumedReadOnly(A, IRP, *this, IsKnown)) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer is written!\n");
return indicatePessimisticFixpoint();
}
@@ -7378,7 +7495,6 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use &U,
if (UserI->mayWriteToMemory())
removeAssumedBits(NO_WRITES);
}
-} // namespace
/// -------------------- Memory Locations Attributes ---------------------------
/// Includes read-none, argmemonly, inaccessiblememonly,
@@ -7412,7 +7528,6 @@ std::string AAMemoryLocation::getMemoryLocationsAsStr(
return S;
}
-namespace {
struct AAMemoryLocationImpl : public AAMemoryLocation {
AAMemoryLocationImpl(const IRPosition &IRP, Attributor &A)
@@ -7657,7 +7772,8 @@ void AAMemoryLocationImpl::categorizePtrValue(
<< getMemoryLocationsAsStr(State.getAssumed()) << "]\n");
SmallVector<Value *, 8> Objects;
- if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I)) {
+ if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I,
+ /* Intraprocedural */ true)) {
LLVM_DEBUG(
dbgs() << "[AAMemoryLocation] Pointer locations not categorized\n");
updateStateAndAccessesMap(State, NO_UNKOWN_MEM, &I, nullptr, Changed,
@@ -9411,7 +9527,7 @@ struct AACallEdgesCallSite : public AACallEdgesImpl {
}
};
- CallBase *CB = static_cast<CallBase *>(getCtxI());
+ CallBase *CB = cast<CallBase>(getCtxI());
if (CB->isInlineAsm()) {
setHasUnknownCallee(false, Change);
@@ -9450,7 +9566,7 @@ struct AACallEdgesFunction : public AACallEdgesImpl {
ChangeStatus Change = ChangeStatus::UNCHANGED;
auto ProcessCallInst = [&](Instruction &Inst) {
- CallBase &CB = static_cast<CallBase &>(Inst);
+ CallBase &CB = cast<CallBase>(Inst);
auto &CBEdges = A.getAAFor<AACallEdges>(
*this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
@@ -9481,11 +9597,39 @@ struct AACallEdgesFunction : public AACallEdgesImpl {
struct AAFunctionReachabilityFunction : public AAFunctionReachability {
private:
struct QuerySet {
- void markReachable(Function *Fn) {
- Reachable.insert(Fn);
- Unreachable.erase(Fn);
+ void markReachable(const Function &Fn) {
+ Reachable.insert(&Fn);
+ Unreachable.erase(&Fn);
}
+ /// If there is no information about the function None is returned.
+ Optional<bool> isCachedReachable(const Function &Fn) {
+ // Assume that we can reach the function.
+ // TODO: Be more specific with the unknown callee.
+ if (CanReachUnknownCallee)
+ return true;
+
+ if (Reachable.count(&Fn))
+ return true;
+
+ if (Unreachable.count(&Fn))
+ return false;
+
+ return llvm::None;
+ }
+
+ /// Set of functions that we know for sure is reachable.
+ DenseSet<const Function *> Reachable;
+
+ /// Set of functions that are unreachable, but might become reachable.
+ DenseSet<const Function *> Unreachable;
+
+ /// If we can reach a function with a call to a unknown function we assume
+ /// that we can reach any function.
+ bool CanReachUnknownCallee = false;
+ };
+
+ struct QueryResolver : public QuerySet {
ChangeStatus update(Attributor &A, const AAFunctionReachability &AA,
ArrayRef<const AACallEdges *> AAEdgesList) {
ChangeStatus Change = ChangeStatus::UNCHANGED;
@@ -9499,31 +9643,30 @@ private:
}
}
- for (Function *Fn : make_early_inc_range(Unreachable)) {
- if (checkIfReachable(A, AA, AAEdgesList, Fn)) {
+ for (const Function *Fn : make_early_inc_range(Unreachable)) {
+ if (checkIfReachable(A, AA, AAEdgesList, *Fn)) {
Change = ChangeStatus::CHANGED;
- markReachable(Fn);
+ markReachable(*Fn);
}
}
return Change;
}
- bool isReachable(Attributor &A, const AAFunctionReachability &AA,
- ArrayRef<const AACallEdges *> AAEdgesList, Function *Fn) {
- // Assume that we can reach the function.
- // TODO: Be more specific with the unknown callee.
- if (CanReachUnknownCallee)
- return true;
-
- if (Reachable.count(Fn))
- return true;
+ bool isReachable(Attributor &A, AAFunctionReachability &AA,
+ ArrayRef<const AACallEdges *> AAEdgesList,
+ const Function &Fn) {
+ Optional<bool> Cached = isCachedReachable(Fn);
+ if (Cached.hasValue())
+ return Cached.getValue();
- if (Unreachable.count(Fn))
- return false;
+ // The query was not cached, thus it is new. We need to request an update
+ // explicitly to make sure this the information is properly run to a
+ // fixpoint.
+ A.registerForUpdate(AA);
// We need to assume that this function can't reach Fn to prevent
// an infinite loop if this function is recursive.
- Unreachable.insert(Fn);
+ Unreachable.insert(&Fn);
bool Result = checkIfReachable(A, AA, AAEdgesList, Fn);
if (Result)
@@ -9533,13 +9676,13 @@ private:
bool checkIfReachable(Attributor &A, const AAFunctionReachability &AA,
ArrayRef<const AACallEdges *> AAEdgesList,
- Function *Fn) const {
+ const Function &Fn) const {
// Handle the most trivial case first.
for (auto *AAEdges : AAEdgesList) {
const SetVector<Function *> &Edges = AAEdges->getOptimisticEdges();
- if (Edges.count(Fn))
+ if (Edges.count(const_cast<Function *>(&Fn)))
return true;
}
@@ -9560,28 +9703,44 @@ private:
}
// The result is false for now, set dependencies and leave.
- for (auto Dep : Deps)
- A.recordDependence(AA, *Dep, DepClassTy::REQUIRED);
+ for (auto *Dep : Deps)
+ A.recordDependence(*Dep, AA, DepClassTy::REQUIRED);
return false;
}
+ };
- /// Set of functions that we know for sure is reachable.
- DenseSet<Function *> Reachable;
+ /// Get call edges that can be reached by this instruction.
+ bool getReachableCallEdges(Attributor &A, const AAReachability &Reachability,
+ const Instruction &Inst,
+ SmallVector<const AACallEdges *> &Result) const {
+ // Determine call like instructions that we can reach from the inst.
+ auto CheckCallBase = [&](Instruction &CBInst) {
+ if (!Reachability.isAssumedReachable(A, Inst, CBInst))
+ return true;
- /// Set of functions that are unreachable, but might become reachable.
- DenseSet<Function *> Unreachable;
+ auto &CB = cast<CallBase>(CBInst);
+ const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
+ *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
- /// If we can reach a function with a call to a unknown function we assume
- /// that we can reach any function.
- bool CanReachUnknownCallee = false;
- };
+ Result.push_back(&AAEdges);
+ return true;
+ };
+
+ bool UsedAssumedInformation = false;
+ return A.checkForAllCallLikeInstructions(CheckCallBase, *this,
+ UsedAssumedInformation,
+ /* CheckBBLivenessOnly */ true);
+ }
public:
AAFunctionReachabilityFunction(const IRPosition &IRP, Attributor &A)
: AAFunctionReachability(IRP, A) {}
- bool canReach(Attributor &A, Function *Fn) const override {
+ bool canReach(Attributor &A, const Function &Fn) const override {
+ if (!isValidState())
+ return true;
+
const AACallEdges &AAEdges =
A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::REQUIRED);
@@ -9590,14 +9749,18 @@ public:
// a const_cast.
// This is a hack for us to be able to cache queries.
auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
- bool Result =
- NonConstThis->WholeFunction.isReachable(A, *this, {&AAEdges}, Fn);
+ bool Result = NonConstThis->WholeFunction.isReachable(A, *NonConstThis,
+ {&AAEdges}, Fn);
return Result;
}
/// Can \p CB reach \p Fn
- bool canReach(Attributor &A, CallBase &CB, Function *Fn) const override {
+ bool canReach(Attributor &A, CallBase &CB,
+ const Function &Fn) const override {
+ if (!isValidState())
+ return true;
+
const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
*this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
@@ -9606,13 +9769,40 @@ public:
// a const_cast.
// This is a hack for us to be able to cache queries.
auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
- QuerySet &CBQuery = NonConstThis->CBQueries[&CB];
+ QueryResolver &CBQuery = NonConstThis->CBQueries[&CB];
- bool Result = CBQuery.isReachable(A, *this, {&AAEdges}, Fn);
+ bool Result = CBQuery.isReachable(A, *NonConstThis, {&AAEdges}, Fn);
return Result;
}
+ bool instructionCanReach(Attributor &A, const Instruction &Inst,
+ const Function &Fn,
+ bool UseBackwards) const override {
+ if (!isValidState())
+ return true;
+
+ if (UseBackwards)
+ return AA::isPotentiallyReachable(A, Inst, Fn, *this, nullptr);
+
+ const auto &Reachability = A.getAAFor<AAReachability>(
+ *this, IRPosition::function(*getAssociatedFunction()),
+ DepClassTy::REQUIRED);
+
+ SmallVector<const AACallEdges *> CallEdges;
+ bool AllKnown = getReachableCallEdges(A, Reachability, Inst, CallEdges);
+ // Attributor returns attributes as const, so this function has to be
+ // const for users of this attribute to use it without having to do
+ // a const_cast.
+ // This is a hack for us to be able to cache queries.
+ auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
+ QueryResolver &InstQSet = NonConstThis->InstQueries[&Inst];
+ if (!AllKnown)
+ InstQSet.CanReachUnknownCallee = true;
+
+ return InstQSet.isReachable(A, *NonConstThis, CallEdges, Fn);
+ }
+
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
const AACallEdges &AAEdges =
@@ -9621,7 +9811,7 @@ public:
Change |= WholeFunction.update(A, *this, {&AAEdges});
- for (auto CBPair : CBQueries) {
+ for (auto &CBPair : CBQueries) {
const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
*this, IRPosition::callsite_function(*CBPair.first),
DepClassTy::REQUIRED);
@@ -9629,6 +9819,25 @@ public:
Change |= CBPair.second.update(A, *this, {&AAEdges});
}
+ // Update the Instruction queries.
+ const AAReachability *Reachability;
+ if (!InstQueries.empty()) {
+ Reachability = &A.getAAFor<AAReachability>(
+ *this, IRPosition::function(*getAssociatedFunction()),
+ DepClassTy::REQUIRED);
+ }
+
+ // Check for local callbases first.
+ for (auto &InstPair : InstQueries) {
+ SmallVector<const AACallEdges *> CallEdges;
+ bool AllKnown =
+ getReachableCallEdges(A, *Reachability, *InstPair.first, CallEdges);
+ // Update will return change if we this effects any queries.
+ if (!AllKnown)
+ InstPair.second.CanReachUnknownCallee = true;
+ Change |= InstPair.second.update(A, *this, CallEdges);
+ }
+
return Change;
}
@@ -9649,11 +9858,14 @@ private:
}
/// Used to answer if a the whole function can reacha a specific function.
- QuerySet WholeFunction;
+ QueryResolver WholeFunction;
/// Used to answer if a call base inside this function can reach a specific
/// function.
- DenseMap<CallBase *, QuerySet> CBQueries;
+ DenseMap<const CallBase *, QueryResolver> CBQueries;
+
+ /// This is for instruction queries than scan "forward".
+ DenseMap<const Instruction *, QueryResolver> InstQueries;
};
/// ---------------------- Assumption Propagation ------------------------------
@@ -9790,8 +10002,6 @@ private:
}
};
-} // namespace
-
AACallGraphNode *AACallEdgeIterator::operator*() const {
return static_cast<AACallGraphNode *>(const_cast<AACallEdges *>(
&A.getOrCreateAAFor<AACallEdges>(IRPosition::function(**I))));
diff --git a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
index 74f11fa30959..927dceec8865 100644
--- a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
+++ b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/ValueLatticeUtils.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index d3cac3efce86..1cb32e32c895 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -352,14 +352,10 @@ static bool collectSRATypes(DenseMap<uint64_t, Type *> &Types, GlobalValue *GV,
while (!Worklist.empty()) {
Use *U = Worklist.pop_back_val();
User *V = U->getUser();
- if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V)) {
- AppendUses(V);
- continue;
- }
- if (auto *GEP = dyn_cast<GEPOperator>(V)) {
- if (!GEP->hasAllConstantIndices())
- return false;
+ auto *GEP = dyn_cast<GEPOperator>(V);
+ if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V) ||
+ (GEP && GEP->hasAllConstantIndices())) {
AppendUses(V);
continue;
}
@@ -2229,6 +2225,13 @@ OptimizeGlobalAliases(Module &M,
for (GlobalValue *GV : Used.used())
Used.compilerUsedErase(GV);
+ // Return whether GV is explicitly or implicitly dso_local and not replaceable
+ // by another definition in the current linkage unit.
+ auto IsModuleLocal = [](GlobalValue &GV) {
+ return !GlobalValue::isInterposableLinkage(GV.getLinkage()) &&
+ (GV.isDSOLocal() || GV.isImplicitDSOLocal());
+ };
+
for (GlobalAlias &J : llvm::make_early_inc_range(M.aliases())) {
// Aliases without names cannot be referenced outside this module.
if (!J.hasName() && !J.isDeclaration() && !J.hasLocalLinkage())
@@ -2240,18 +2243,20 @@ OptimizeGlobalAliases(Module &M,
}
// If the alias can change at link time, nothing can be done - bail out.
- if (J.isInterposable())
+ if (!IsModuleLocal(J))
continue;
Constant *Aliasee = J.getAliasee();
GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
// We can't trivially replace the alias with the aliasee if the aliasee is
// non-trivial in some way. We also can't replace the alias with the aliasee
- // if the aliasee is interposable because aliases point to the local
- // definition.
+ // if the aliasee may be preemptible at runtime. On ELF, a non-preemptible
+ // alias can be used to access the definition as if preemption did not
+ // happen.
// TODO: Try to handle non-zero GEPs of local aliasees.
- if (!Target || Target->isInterposable())
+ if (!Target || !IsModuleLocal(*Target))
continue;
+
Target->removeDeadConstantUsers();
// Make all users of the alias use the aliasee instead.
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index e064fbbef595..faf7cb7d566a 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -42,6 +42,11 @@ extern cl::opt<bool> DisableBranches;
// A command flag to be used for debugging to indirect calls from similarity
// matching and outlining.
extern cl::opt<bool> DisableIndirectCalls;
+
+// A command flag to be used for debugging to exclude intrinsics from similarity
+// matching and outlining.
+extern cl::opt<bool> DisableIntrinsics;
+
} // namespace llvm
// Set to true if the user wants the ir outliner to run on linkonceodr linkage
@@ -2610,6 +2615,8 @@ unsigned IROutliner::doOutline(Module &M) {
// Find the possible similarity sections.
InstructionClassifier.EnableBranches = !DisableBranches;
InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
+ InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
+
IRSimilarityIdentifier &Identifier = getIRSI(M);
SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index c0bb19e184d6..8e83d7bcb6c2 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -42,6 +42,7 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 68f33410c602..2d765fb6ce6d 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -26,19 +26,25 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/Assumptions.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -98,6 +104,11 @@ static cl::opt<bool> DisableOpenMPOptStateMachineRewrite(
cl::desc("Disable OpenMP optimizations that replace the state machine."),
cl::Hidden, cl::init(false));
+static cl::opt<bool> DisableOpenMPOptBarrierElimination(
+ "openmp-opt-disable-barrier-elimination", cl::ZeroOrMore,
+ cl::desc("Disable OpenMP optimizations that eliminate barriers."),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool> PrintModuleAfterOptimizations(
"openmp-opt-print-module", cl::ZeroOrMore,
cl::desc("Print the current module after OpenMP optimizations."),
@@ -147,6 +158,7 @@ STATISTIC(NumOpenMPParallelRegionsMerged,
"Number of OpenMP parallel regions merged");
STATISTIC(NumBytesMovedToSharedMemory,
"Amount of memory pushed to shared memory");
+STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated");
#if !defined(NDEBUG)
static constexpr auto TAG = "[" DEBUG_TYPE "]";
@@ -458,7 +470,6 @@ struct OMPInformationCache : public InformationCache {
RTLFunctions.insert(F); \
if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \
RuntimeFunctionIDMap[F] = _Enum; \
- F->removeFnAttr(Attribute::NoInline); \
auto &RFI = RFIs[_Enum]; \
RFI.Kind = _Enum; \
RFI.Name = _Name; \
@@ -480,6 +491,15 @@ struct OMPInformationCache : public InformationCache {
}
#include "llvm/Frontend/OpenMP/OMPKinds.def"
+ // Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_`
+ // functions, except if `optnone` is present.
+ for (Function &F : M) {
+ for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"})
+ if (F.getName().startswith(Prefix) &&
+ !F.hasFnAttribute(Attribute::OptimizeNone))
+ F.removeFnAttr(Attribute::NoInline);
+ }
+
// TODO: We should attach the attributes defined in OMPKinds.def.
}
@@ -787,6 +807,8 @@ struct OpenMPOpt {
if (remarksEnabled())
analysisGlobalization();
+
+ Changed |= eliminateBarriers();
} else {
if (PrintICVValues)
printICVs();
@@ -809,6 +831,8 @@ struct OpenMPOpt {
Changed = true;
}
}
+
+ Changed |= eliminateBarriers();
}
return Changed;
@@ -1378,6 +1402,213 @@ private:
return Changed;
}
+ /// Eliminates redundant, aligned barriers in OpenMP offloaded kernels.
+ /// TODO: Make this an AA and expand it to work across blocks and functions.
+ bool eliminateBarriers() {
+ bool Changed = false;
+
+ if (DisableOpenMPOptBarrierElimination)
+ return /*Changed=*/false;
+
+ if (OMPInfoCache.Kernels.empty())
+ return /*Changed=*/false;
+
+ enum ImplicitBarrierType { IBT_ENTRY, IBT_EXIT };
+
+ class BarrierInfo {
+ Instruction *I;
+ enum ImplicitBarrierType Type;
+
+ public:
+ BarrierInfo(enum ImplicitBarrierType Type) : I(nullptr), Type(Type) {}
+ BarrierInfo(Instruction &I) : I(&I) {}
+
+ bool isImplicit() { return !I; }
+
+ bool isImplicitEntry() { return isImplicit() && Type == IBT_ENTRY; }
+
+ bool isImplicitExit() { return isImplicit() && Type == IBT_EXIT; }
+
+ Instruction *getInstruction() { return I; }
+ };
+
+ for (Function *Kernel : OMPInfoCache.Kernels) {
+ for (BasicBlock &BB : *Kernel) {
+ SmallVector<BarrierInfo, 8> BarriersInBlock;
+ SmallPtrSet<Instruction *, 8> BarriersToBeDeleted;
+
+ // Add the kernel entry implicit barrier.
+ if (&Kernel->getEntryBlock() == &BB)
+ BarriersInBlock.push_back(IBT_ENTRY);
+
+ // Find implicit and explicit aligned barriers in the same basic block.
+ for (Instruction &I : BB) {
+ if (isa<ReturnInst>(I)) {
+ // Add the implicit barrier when exiting the kernel.
+ BarriersInBlock.push_back(IBT_EXIT);
+ continue;
+ }
+ CallBase *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
+ continue;
+
+ auto IsAlignBarrierCB = [&](CallBase &CB) {
+ switch (CB.getIntrinsicID()) {
+ case Intrinsic::nvvm_barrier0:
+ case Intrinsic::nvvm_barrier0_and:
+ case Intrinsic::nvvm_barrier0_or:
+ case Intrinsic::nvvm_barrier0_popc:
+ case Intrinsic::amdgcn_s_barrier:
+ return true;
+ default:
+ break;
+ }
+ return hasAssumption(CB,
+ KnownAssumptionString("ompx_aligned_barrier"));
+ };
+
+ if (IsAlignBarrierCB(*CB)) {
+ // Add an explicit aligned barrier.
+ BarriersInBlock.push_back(I);
+ }
+ }
+
+ if (BarriersInBlock.size() <= 1)
+ continue;
+
+ // A barrier in a barrier pair is removeable if all instructions
+ // between the barriers in the pair are side-effect free modulo the
+ // barrier operation.
+ auto IsBarrierRemoveable = [&Kernel](BarrierInfo *StartBI,
+ BarrierInfo *EndBI) {
+ assert(
+ !StartBI->isImplicitExit() &&
+ "Expected start barrier to be other than a kernel exit barrier");
+ assert(
+ !EndBI->isImplicitEntry() &&
+ "Expected end barrier to be other than a kernel entry barrier");
+ // If StarBI instructions is null then this the implicit
+ // kernel entry barrier, so iterate from the first instruction in the
+ // entry block.
+ Instruction *I = (StartBI->isImplicitEntry())
+ ? &Kernel->getEntryBlock().front()
+ : StartBI->getInstruction()->getNextNode();
+ assert(I && "Expected non-null start instruction");
+ Instruction *E = (EndBI->isImplicitExit())
+ ? I->getParent()->getTerminator()
+ : EndBI->getInstruction();
+ assert(E && "Expected non-null end instruction");
+
+ for (; I != E; I = I->getNextNode()) {
+ if (!I->mayHaveSideEffects() && !I->mayReadFromMemory())
+ continue;
+
+ auto IsPotentiallyAffectedByBarrier =
+ [](Optional<MemoryLocation> Loc) {
+ const Value *Obj = (Loc && Loc->Ptr)
+ ? getUnderlyingObject(Loc->Ptr)
+ : nullptr;
+ if (!Obj) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Access to unknown location requires barriers\n");
+ return true;
+ }
+ if (isa<UndefValue>(Obj))
+ return false;
+ if (isa<AllocaInst>(Obj))
+ return false;
+ if (auto *GV = dyn_cast<GlobalVariable>(Obj)) {
+ if (GV->isConstant())
+ return false;
+ if (GV->isThreadLocal())
+ return false;
+ if (GV->getAddressSpace() == (int)AddressSpace::Local)
+ return false;
+ if (GV->getAddressSpace() == (int)AddressSpace::Constant)
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "Access to '" << *Obj
+ << "' requires barriers\n");
+ return true;
+ };
+
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+ Optional<MemoryLocation> Loc = MemoryLocation::getForDest(MI);
+ if (IsPotentiallyAffectedByBarrier(Loc))
+ return false;
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
+ Optional<MemoryLocation> Loc =
+ MemoryLocation::getForSource(MTI);
+ if (IsPotentiallyAffectedByBarrier(Loc))
+ return false;
+ }
+ continue;
+ }
+
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ if (LI->hasMetadata(LLVMContext::MD_invariant_load))
+ continue;
+
+ Optional<MemoryLocation> Loc = MemoryLocation::getOrNone(I);
+ if (IsPotentiallyAffectedByBarrier(Loc))
+ return false;
+ }
+
+ return true;
+ };
+
+ // Iterate barrier pairs and remove an explicit barrier if analysis
+ // deems it removeable.
+ for (auto *It = BarriersInBlock.begin(),
+ *End = BarriersInBlock.end() - 1;
+ It != End; ++It) {
+
+ BarrierInfo *StartBI = It;
+ BarrierInfo *EndBI = (It + 1);
+
+ // Cannot remove when both are implicit barriers, continue.
+ if (StartBI->isImplicit() && EndBI->isImplicit())
+ continue;
+
+ if (!IsBarrierRemoveable(StartBI, EndBI))
+ continue;
+
+ assert(!(StartBI->isImplicit() && EndBI->isImplicit()) &&
+ "Expected at least one explicit barrier to remove.");
+
+ // Remove an explicit barrier, check first, then second.
+ if (!StartBI->isImplicit()) {
+ LLVM_DEBUG(dbgs() << "Remove start barrier "
+ << *StartBI->getInstruction() << "\n");
+ BarriersToBeDeleted.insert(StartBI->getInstruction());
+ } else {
+ LLVM_DEBUG(dbgs() << "Remove end barrier "
+ << *EndBI->getInstruction() << "\n");
+ BarriersToBeDeleted.insert(EndBI->getInstruction());
+ }
+ }
+
+ if (BarriersToBeDeleted.empty())
+ continue;
+
+ Changed = true;
+ for (Instruction *I : BarriersToBeDeleted) {
+ ++NumBarriersEliminated;
+ auto Remark = [&](OptimizationRemark OR) {
+ return OR << "Redundant barrier eliminated.";
+ };
+
+ if (EnableVerboseRemarks)
+ emitRemark<OptimizationRemark>(I, "OMP190", Remark);
+ I->eraseFromParent();
+ }
+ }
+ }
+
+ return Changed;
+ }
+
void analysisGlobalization() {
auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index 21395460bccb..e104ae00e916 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/CRC.h"
diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index daaf6cbeb3fd..52708ff2f226 100644
--- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -535,7 +535,7 @@ void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
// the information that is needed by thin link will be written in the
// given OS.
if (ThinLinkOS && Index)
- WriteThinLinkBitcodeToFile(M, *ThinLinkOS, *Index, ModHash);
+ writeThinLinkBitcodeToFile(M, *ThinLinkOS, *Index, ModHash);
}
class WriteThinLTOBitcode : public ModulePass {
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 6acace1d9fd4..8b30f0e989a1 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -970,7 +970,7 @@ bool DevirtModule::runForTesting(
if (StringRef(ClWriteSummary).endswith(".bc")) {
raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_None);
ExitOnErr(errorCodeToError(EC));
- WriteIndexToFile(*Summary, OS);
+ writeIndexToFile(*Summary, OS);
} else {
raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_TextWithCRLF);
ExitOnErr(errorCodeToError(EC));
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 1fb46af46bee..05b28328afbf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2468,10 +2468,28 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// Fence instruction simplification
Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) {
- // Remove identical consecutive fences.
- Instruction *Next = FI.getNextNonDebugInstruction();
- if (auto *NFI = dyn_cast<FenceInst>(Next))
- if (FI.isIdenticalTo(NFI))
+ auto *NFI = dyn_cast<FenceInst>(FI.getNextNonDebugInstruction());
+ // This check is solely here to handle arbitrary target-dependent syncscopes.
+ // TODO: Can remove if does not matter in practice.
+ if (NFI && FI.isIdenticalTo(NFI))
+ return eraseInstFromFunction(FI);
+
+ // Returns true if FI1 is identical or stronger fence than FI2.
+ auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
+ auto FI1SyncScope = FI1->getSyncScopeID();
+ // Consider same scope, where scope is global or single-thread.
+ if (FI1SyncScope != FI2->getSyncScopeID() ||
+ (FI1SyncScope != SyncScope::System &&
+ FI1SyncScope != SyncScope::SingleThread))
+ return false;
+
+ return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
+ };
+ if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
+ return eraseInstFromFunction(FI);
+
+ if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNonDebugInstruction()))
+ if (isIdenticalOrStrongerFence(PFI, &FI))
return eraseInstFromFunction(FI);
return nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index fd58a44504b3..e45be5745fcc 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5882,6 +5882,55 @@ static Instruction *foldICmpInvariantGroup(ICmpInst &I) {
return nullptr;
}
+/// This function folds patterns produced by lowering of reduce idioms, such as
+/// llvm.vector.reduce.and which are lowered into instruction chains. This code
+/// attempts to generate fewer number of scalar comparisons instead of vector
+/// comparisons when possible.
+static Instruction *foldReductionIdiom(ICmpInst &I,
+ InstCombiner::BuilderTy &Builder,
+ const DataLayout &DL) {
+ if (I.getType()->isVectorTy())
+ return nullptr;
+ ICmpInst::Predicate OuterPred, InnerPred;
+ Value *LHS, *RHS;
+
+ // Match lowering of @llvm.vector.reduce.and. Turn
+ /// %vec_ne = icmp ne <8 x i8> %lhs, %rhs
+ /// %scalar_ne = bitcast <8 x i1> %vec_ne to i8
+ /// %res = icmp <pred> i8 %scalar_ne, 0
+ ///
+ /// into
+ ///
+ /// %lhs.scalar = bitcast <8 x i8> %lhs to i64
+ /// %rhs.scalar = bitcast <8 x i8> %rhs to i64
+ /// %res = icmp <pred> i64 %lhs.scalar, %rhs.scalar
+ ///
+ /// for <pred> in {ne, eq}.
+ if (!match(&I, m_ICmp(OuterPred,
+ m_OneUse(m_BitCast(m_OneUse(
+ m_ICmp(InnerPred, m_Value(LHS), m_Value(RHS))))),
+ m_Zero())))
+ return nullptr;
+ auto *LHSTy = dyn_cast<FixedVectorType>(LHS->getType());
+ if (!LHSTy || !LHSTy->getElementType()->isIntegerTy())
+ return nullptr;
+ unsigned NumBits =
+ LHSTy->getNumElements() * LHSTy->getElementType()->getIntegerBitWidth();
+ // TODO: Relax this to "not wider than max legal integer type"?
+ if (!DL.isLegalInteger(NumBits))
+ return nullptr;
+
+ if (ICmpInst::isEquality(OuterPred) && InnerPred == ICmpInst::ICMP_NE) {
+ auto *ScalarTy = Builder.getIntNTy(NumBits);
+ LHS = Builder.CreateBitCast(LHS, ScalarTy, LHS->getName() + ".scalar");
+ RHS = Builder.CreateBitCast(RHS, ScalarTy, RHS->getName() + ".scalar");
+ return ICmpInst::Create(Instruction::ICmp, OuterPred, LHS, RHS,
+ I.getName());
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
bool Changed = false;
const SimplifyQuery Q = SQ.getWithInstruction(&I);
@@ -6124,6 +6173,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpInvariantGroup(I))
return Res;
+ if (Instruction *Res = foldReductionIdiom(I, Builder, DL))
+ return Res;
+
return Changed ? &I : nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 30f6aab2114b..09694d50468f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -46,8 +46,8 @@ void InstCombinerImpl::PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN) {
// will be inefficient.
assert(!isa<CallInst>(Inst));
- for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
- auto *I = cast<Instruction>(PN.getIncomingValue(i));
+ for (Value *V : drop_begin(PN.incoming_values())) {
+ auto *I = cast<Instruction>(V);
Inst->applyMergedLocation(Inst->getDebugLoc(), I->getDebugLoc());
}
}
@@ -138,8 +138,9 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {
return nullptr;
SmallVector<Value *, 4> AvailablePtrVals;
- for (unsigned i = 0; i != PN.getNumIncomingValues(); ++i) {
- Value *Arg = PN.getIncomingValue(i);
+ for (auto Incoming : zip(PN.blocks(), PN.incoming_values())) {
+ BasicBlock *BB = std::get<0>(Incoming);
+ Value *Arg = std::get<1>(Incoming);
// First look backward:
if (auto *PI = dyn_cast<PtrToIntInst>(Arg)) {
@@ -151,8 +152,8 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {
Value *ArgIntToPtr = nullptr;
for (User *U : Arg->users()) {
if (isa<IntToPtrInst>(U) && U->getType() == IntToPtr->getType() &&
- (DT.dominates(cast<Instruction>(U), PN.getIncomingBlock(i)) ||
- cast<Instruction>(U)->getParent() == PN.getIncomingBlock(i))) {
+ (DT.dominates(cast<Instruction>(U), BB) ||
+ cast<Instruction>(U)->getParent() == BB)) {
ArgIntToPtr = U;
break;
}
@@ -190,26 +191,21 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {
"Not enough available ptr typed incoming values");
PHINode *MatchingPtrPHI = nullptr;
unsigned NumPhis = 0;
- for (auto II = BB->begin(); II != BB->end(); II++, NumPhis++) {
+ for (PHINode &PtrPHI : BB->phis()) {
// FIXME: consider handling this in AggressiveInstCombine
- PHINode *PtrPHI = dyn_cast<PHINode>(II);
- if (!PtrPHI)
- break;
- if (NumPhis > MaxNumPhis)
+ if (NumPhis++ > MaxNumPhis)
return nullptr;
- if (PtrPHI == &PN || PtrPHI->getType() != IntToPtr->getType())
+ if (&PtrPHI == &PN || PtrPHI.getType() != IntToPtr->getType())
continue;
- MatchingPtrPHI = PtrPHI;
- for (unsigned i = 0; i != PtrPHI->getNumIncomingValues(); ++i) {
- if (AvailablePtrVals[i] !=
- PtrPHI->getIncomingValueForBlock(PN.getIncomingBlock(i))) {
- MatchingPtrPHI = nullptr;
- break;
- }
- }
-
- if (MatchingPtrPHI)
- break;
+ if (any_of(zip(PN.blocks(), AvailablePtrVals),
+ [&](const auto &BlockAndValue) {
+ BasicBlock *BB = std::get<0>(BlockAndValue);
+ Value *V = std::get<1>(BlockAndValue);
+ return PtrPHI.getIncomingValueForBlock(BB) != V;
+ }))
+ continue;
+ MatchingPtrPHI = &PtrPHI;
+ break;
}
if (MatchingPtrPHI) {
@@ -250,9 +246,9 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {
InsertNewInstBefore(NewPtrPHI, PN);
SmallDenseMap<Value *, Instruction *> Casts;
- for (unsigned i = 0; i != PN.getNumIncomingValues(); ++i) {
- auto *IncomingBB = PN.getIncomingBlock(i);
- auto *IncomingVal = AvailablePtrVals[i];
+ for (auto Incoming : zip(PN.blocks(), AvailablePtrVals)) {
+ auto *IncomingBB = std::get<0>(Incoming);
+ auto *IncomingVal = std::get<1>(Incoming);
if (IncomingVal->getType() == IntToPtr->getType()) {
NewPtrPHI->addIncoming(IncomingVal, IncomingBB);
@@ -330,8 +326,8 @@ InstCombinerImpl::foldPHIArgInsertValueInstructionIntoPHI(PHINode &PN) {
// Scan to see if all operands are `insertvalue`'s with the same indicies,
// and all have a single use.
- for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
- auto *I = dyn_cast<InsertValueInst>(PN.getIncomingValue(i));
+ for (Value *V : drop_begin(PN.incoming_values())) {
+ auto *I = dyn_cast<InsertValueInst>(V);
if (!I || !I->hasOneUser() || I->getIndices() != FirstIVI->getIndices())
return nullptr;
}
@@ -370,8 +366,8 @@ InstCombinerImpl::foldPHIArgExtractValueInstructionIntoPHI(PHINode &PN) {
// Scan to see if all operands are `extractvalue`'s with the same indicies,
// and all have a single use.
- for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
- auto *I = dyn_cast<ExtractValueInst>(PN.getIncomingValue(i));
+ for (Value *V : drop_begin(PN.incoming_values())) {
+ auto *I = dyn_cast<ExtractValueInst>(V);
if (!I || !I->hasOneUser() || I->getIndices() != FirstEVI->getIndices() ||
I->getAggregateOperand()->getType() !=
FirstEVI->getAggregateOperand()->getType())
@@ -412,8 +408,8 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) {
Type *RHSType = RHSVal->getType();
// Scan to see if all operands are the same opcode, and all have one user.
- for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
- Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+ for (Value *V : drop_begin(PN.incoming_values())) {
+ Instruction *I = dyn_cast<Instruction>(V);
if (!I || I->getOpcode() != Opc || !I->hasOneUser() ||
// Verify type of the LHS matches so we don't fold cmp's of different
// types.
@@ -461,15 +457,17 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) {
// Add all operands to the new PHIs.
if (NewLHS || NewRHS) {
- for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
- Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i));
+ for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {
+ BasicBlock *InBB = std::get<0>(Incoming);
+ Value *InVal = std::get<1>(Incoming);
+ Instruction *InInst = cast<Instruction>(InVal);
if (NewLHS) {
Value *NewInLHS = InInst->getOperand(0);
- NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i));
+ NewLHS->addIncoming(NewInLHS, InBB);
}
if (NewRHS) {
Value *NewInRHS = InInst->getOperand(1);
- NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i));
+ NewRHS->addIncoming(NewInRHS, InBB);
}
}
}
@@ -487,8 +485,8 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) {
NewBinOp->copyIRFlags(PN.getIncomingValue(0));
- for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i)
- NewBinOp->andIRFlags(PN.getIncomingValue(i));
+ for (Value *V : drop_begin(PN.incoming_values()))
+ NewBinOp->andIRFlags(V);
PHIArgMergedDebugLoc(NewBinOp, PN);
return NewBinOp;
@@ -511,9 +509,8 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
bool AllInBounds = true;
// Scan to see if all operands are the same opcode, and all have one user.
- for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
- GetElementPtrInst *GEP =
- dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
+ for (Value *V : drop_begin(PN.incoming_values())) {
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V);
if (!GEP || !GEP->hasOneUser() || GEP->getType() != FirstInst->getType() ||
GEP->getNumOperands() != FirstInst->getNumOperands())
return nullptr;
@@ -527,8 +524,8 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
AllBasePointersAreAllocas = false;
// Compare the operand lists.
- for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
- if (FirstInst->getOperand(op) == GEP->getOperand(op))
+ for (unsigned Op = 0, E = FirstInst->getNumOperands(); Op != E; ++Op) {
+ if (FirstInst->getOperand(Op) == GEP->getOperand(Op))
continue;
// Don't merge two GEPs when two operands differ (introducing phi nodes)
@@ -536,11 +533,12 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
// substantially cheaper to compute for the constants, so making it a
// variable index could pessimize the path. This also handles the case
// for struct indices, which must always be constant.
- if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
- isa<ConstantInt>(GEP->getOperand(op)))
+ if (isa<ConstantInt>(FirstInst->getOperand(Op)) ||
+ isa<ConstantInt>(GEP->getOperand(Op)))
return nullptr;
- if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
+ if (FirstInst->getOperand(Op)->getType() !=
+ GEP->getOperand(Op)->getType())
return nullptr;
// If we already needed a PHI for an earlier operand, and another operand
@@ -550,7 +548,7 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
if (NeededPhi)
return nullptr;
- FixedOperands[op] = nullptr; // Needs a PHI.
+ FixedOperands[Op] = nullptr; // Needs a PHI.
NeededPhi = true;
}
}
@@ -569,29 +567,30 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
bool HasAnyPHIs = false;
- for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
- if (FixedOperands[i]) continue; // operand doesn't need a phi.
- Value *FirstOp = FirstInst->getOperand(i);
- PHINode *NewPN = PHINode::Create(FirstOp->getType(), e,
- FirstOp->getName()+".pn");
+ for (unsigned I = 0, E = FixedOperands.size(); I != E; ++I) {
+ if (FixedOperands[I])
+ continue; // operand doesn't need a phi.
+ Value *FirstOp = FirstInst->getOperand(I);
+ PHINode *NewPN =
+ PHINode::Create(FirstOp->getType(), E, FirstOp->getName() + ".pn");
InsertNewInstBefore(NewPN, PN);
NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
- OperandPhis[i] = NewPN;
- FixedOperands[i] = NewPN;
+ OperandPhis[I] = NewPN;
+ FixedOperands[I] = NewPN;
HasAnyPHIs = true;
}
-
// Add all operands to the new PHIs.
if (HasAnyPHIs) {
- for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
- GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
- BasicBlock *InBB = PN.getIncomingBlock(i);
-
- for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
- if (PHINode *OpPhi = OperandPhis[op])
- OpPhi->addIncoming(InGEP->getOperand(op), InBB);
+ for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {
+ BasicBlock *InBB = std::get<0>(Incoming);
+ Value *InVal = std::get<1>(Incoming);
+ GetElementPtrInst *InGEP = cast<GetElementPtrInst>(InVal);
+
+ for (unsigned Op = 0, E = OperandPhis.size(); Op != E; ++Op)
+ if (PHINode *OpPhi = OperandPhis[Op])
+ OpPhi->addIncoming(InGEP->getOperand(Op), InBB);
}
}
@@ -627,18 +626,18 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
// Check for non-address taken alloca. If not address-taken already, it isn't
// profitable to do this xform.
if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
- bool isAddressTaken = false;
+ bool IsAddressTaken = false;
for (User *U : AI->users()) {
if (isa<LoadInst>(U)) continue;
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
// If storing TO the alloca, then the address isn't taken.
if (SI->getOperand(1) == AI) continue;
}
- isAddressTaken = true;
+ IsAddressTaken = true;
break;
}
- if (!isAddressTaken && AI->isStaticAlloca())
+ if (!IsAddressTaken && AI->isStaticAlloca())
return false;
}
@@ -665,9 +664,9 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
// When processing loads, we need to propagate two bits of information to the
// sunk load: whether it is volatile, and what its alignment is.
- bool isVolatile = FirstLI->isVolatile();
+ bool IsVolatile = FirstLI->isVolatile();
Align LoadAlignment = FirstLI->getAlign();
- unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
+ const unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
// We can't sink the load if the loaded value could be modified between the
// load and the PHI.
@@ -678,22 +677,25 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
// If the PHI is of volatile loads and the load block has multiple
// successors, sinking it would remove a load of the volatile value from
// the path through the other successor.
- if (isVolatile &&
+ if (IsVolatile &&
FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
return nullptr;
- // Check to see if all arguments are the same operation.
- for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
- LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
- if (!LI || !LI->hasOneUser())
+ for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {
+ BasicBlock *InBB = std::get<0>(Incoming);
+ Value *InVal = std::get<1>(Incoming);
+ LoadInst *LI = dyn_cast<LoadInst>(InVal);
+ if (!LI || !LI->hasOneUser() || LI->isAtomic())
+ return nullptr;
+
+ // Make sure all arguments are the same type of operation.
+ if (LI->isVolatile() != IsVolatile ||
+ LI->getPointerAddressSpace() != LoadAddrSpace)
return nullptr;
// We can't sink the load if the loaded value could be modified between
// the load and the PHI.
- if (LI->isVolatile() != isVolatile ||
- LI->getParent() != PN.getIncomingBlock(i) ||
- LI->getPointerAddressSpace() != LoadAddrSpace ||
- !isSafeAndProfitableToSinkLoad(LI))
+ if (LI->getParent() != InBB || !isSafeAndProfitableToSinkLoad(LI))
return nullptr;
LoadAlignment = std::min(LoadAlignment, LI->getAlign());
@@ -701,8 +703,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
// If the PHI is of volatile loads and the load block has multiple
// successors, sinking it would remove a load of the volatile value from
// the path through the other successor.
- if (isVolatile &&
- LI->getParent()->getTerminator()->getNumSuccessors() != 1)
+ if (IsVolatile && LI->getParent()->getTerminator()->getNumSuccessors() != 1)
return nullptr;
}
@@ -715,7 +716,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
Value *InVal = FirstLI->getOperand(0);
NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
LoadInst *NewLI =
- new LoadInst(FirstLI->getType(), NewPN, "", isVolatile, LoadAlignment);
+ new LoadInst(FirstLI->getType(), NewPN, "", IsVolatile, LoadAlignment);
unsigned KnownIDs[] = {
LLVMContext::MD_tbaa,
@@ -734,13 +735,15 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
NewLI->setMetadata(ID, FirstLI->getMetadata(ID));
// Add all operands to the new PHI and combine TBAA metadata.
- for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
- LoadInst *LI = cast<LoadInst>(PN.getIncomingValue(i));
+ for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {
+ BasicBlock *BB = std::get<0>(Incoming);
+ Value *V = std::get<1>(Incoming);
+ LoadInst *LI = cast<LoadInst>(V);
combineMetadata(NewLI, LI, KnownIDs, true);
Value *NewInVal = LI->getOperand(0);
if (NewInVal != InVal)
InVal = nullptr;
- NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+ NewPN->addIncoming(NewInVal, BB);
}
if (InVal) {
@@ -755,7 +758,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
// If this was a volatile load that we are merging, make sure to loop through
// and mark all the input loads as non-volatile. If we don't do this, we will
// insert a new volatile load and the old ones will not be deletable.
- if (isVolatile)
+ if (IsVolatile)
for (Value *IncValue : PN.incoming_values())
cast<LoadInst>(IncValue)->setVolatile(false);
@@ -830,8 +833,8 @@ Instruction *InstCombinerImpl::foldPHIArgZextsIntoPHI(PHINode &Phi) {
// operands, and zext the result back to the original type.
PHINode *NewPhi = PHINode::Create(NarrowType, NumIncomingValues,
Phi.getName() + ".shrunk");
- for (unsigned i = 0; i != NumIncomingValues; ++i)
- NewPhi->addIncoming(NewIncoming[i], Phi.getIncomingBlock(i));
+ for (unsigned I = 0; I != NumIncomingValues; ++I)
+ NewPhi->addIncoming(NewIncoming[I], Phi.getIncomingBlock(I));
InsertNewInstBefore(NewPhi, Phi);
return CastInst::CreateZExtOrBitCast(NewPhi, Phi.getType());
@@ -885,13 +888,13 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {
}
// Check to see if all arguments are the same operation.
- for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
- Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+ for (Value *V : drop_begin(PN.incoming_values())) {
+ Instruction *I = dyn_cast<Instruction>(V);
if (!I || !I->hasOneUser() || !I->isSameOperationAs(FirstInst))
return nullptr;
if (CastSrcTy) {
if (I->getOperand(0)->getType() != CastSrcTy)
- return nullptr; // Cast operation must match.
+ return nullptr; // Cast operation must match.
} else if (I->getOperand(1) != ConstantOp) {
return nullptr;
}
@@ -907,11 +910,13 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {
NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
// Add all operands to the new PHI.
- for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
- Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0);
+ for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {
+ BasicBlock *BB = std::get<0>(Incoming);
+ Value *V = std::get<1>(Incoming);
+ Value *NewInVal = cast<Instruction>(V)->getOperand(0);
if (NewInVal != InVal)
InVal = nullptr;
- NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+ NewPN->addIncoming(NewInVal, BB);
}
Value *PhiVal;
@@ -937,8 +942,8 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {
BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
BinOp->copyIRFlags(PN.getIncomingValue(0));
- for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i)
- BinOp->andIRFlags(PN.getIncomingValue(i));
+ for (Value *V : drop_begin(PN.incoming_values()))
+ BinOp->andIRFlags(V);
PHIArgMergedDebugLoc(BinOp, PN);
return BinOp;
@@ -952,8 +957,8 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {
}
/// Return true if this PHI node is only used by a PHI node cycle that is dead.
-static bool DeadPHICycle(PHINode *PN,
- SmallPtrSetImpl<PHINode*> &PotentiallyDeadPHIs) {
+static bool isDeadPHICycle(PHINode *PN,
+ SmallPtrSetImpl<PHINode *> &PotentiallyDeadPHIs) {
if (PN->use_empty()) return true;
if (!PN->hasOneUse()) return false;
@@ -966,7 +971,7 @@ static bool DeadPHICycle(PHINode *PN,
return false;
if (PHINode *PU = dyn_cast<PHINode>(PN->user_back()))
- return DeadPHICycle(PU, PotentiallyDeadPHIs);
+ return isDeadPHICycle(PU, PotentiallyDeadPHIs);
return false;
}
@@ -999,7 +1004,7 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
/// Return an existing non-zero constant if this phi node has one, otherwise
/// return constant 1.
-static ConstantInt *GetAnyNonZeroConstInt(PHINode &PN) {
+static ConstantInt *getAnyNonZeroConstInt(PHINode &PN) {
assert(isa<IntegerType>(PN.getType()) && "Expect only integer type phi");
for (Value *V : PN.operands())
if (auto *ConstVA = dyn_cast<ConstantInt>(V))
@@ -1014,8 +1019,8 @@ struct PHIUsageRecord {
unsigned Shift; // The amount shifted.
Instruction *Inst; // The trunc instruction.
- PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
- : PHIId(pn), Shift(Sh), Inst(User) {}
+ PHIUsageRecord(unsigned Pn, unsigned Sh, Instruction *User)
+ : PHIId(Pn), Shift(Sh), Inst(User) {}
bool operator<(const PHIUsageRecord &RHS) const {
if (PHIId < RHS.PHIId) return true;
@@ -1032,12 +1037,11 @@ struct LoweredPHIRecord {
unsigned Shift; // The amount shifted.
unsigned Width; // The width extracted.
- LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty)
- : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
+ LoweredPHIRecord(PHINode *Phi, unsigned Sh, Type *Ty)
+ : PN(Phi), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
// Ctor form used by DenseMap.
- LoweredPHIRecord(PHINode *pn, unsigned Sh)
- : PN(pn), Shift(Sh), Width(0) {}
+ LoweredPHIRecord(PHINode *Phi, unsigned Sh) : PN(Phi), Shift(Sh), Width(0) {}
};
} // namespace
@@ -1093,10 +1097,13 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// input is defined in the predecessor, then we won't be split the critical
// edge which is required to insert a truncate. Because of this, we have to
// bail out.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i));
- if (!II) continue;
- if (II->getParent() != PN->getIncomingBlock(i))
+ for (auto Incoming : zip(PN->blocks(), PN->incoming_values())) {
+ BasicBlock *BB = std::get<0>(Incoming);
+ Value *V = std::get<1>(Incoming);
+ InvokeInst *II = dyn_cast<InvokeInst>(V);
+ if (!II)
+ continue;
+ if (II->getParent() != BB)
continue;
// If we have a phi, and if it's directly in the predecessor, then we have
@@ -1146,8 +1153,8 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
array_pod_sort(PHIUsers.begin(), PHIUsers.end());
LLVM_DEBUG(dbgs() << "SLICING UP PHI: " << FirstPhi << '\n';
- for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) dbgs()
- << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] << '\n';);
+ for (unsigned I = 1; I != PHIsToSlice.size(); ++I) dbgs()
+ << "AND USER PHI #" << I << ": " << *PHIsToSlice[I] << '\n');
// PredValues - This is a temporary used when rewriting PHI nodes. It is
// hoisted out here to avoid construction/destruction thrashing.
@@ -1175,8 +1182,9 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
assert(EltPHI->getType() != PN->getType() &&
"Truncate didn't shrink phi?");
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- BasicBlock *Pred = PN->getIncomingBlock(i);
+ for (auto Incoming : zip(PN->blocks(), PN->incoming_values())) {
+ BasicBlock *Pred = std::get<0>(Incoming);
+ Value *InVal = std::get<1>(Incoming);
Value *&PredVal = PredValues[Pred];
// If we already have a value for this predecessor, reuse it.
@@ -1186,7 +1194,6 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
}
// Handle the PHI self-reuse case.
- Value *InVal = PN->getIncomingValue(i);
if (InVal == PN) {
PredVal = EltPHI;
EltPHI->addIncoming(PredVal, Pred);
@@ -1207,8 +1214,8 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
Builder.SetInsertPoint(Pred->getTerminator());
Value *Res = InVal;
if (Offset)
- Res = Builder.CreateLShr(Res, ConstantInt::get(InVal->getType(),
- Offset), "extract");
+ Res = Builder.CreateLShr(
+ Res, ConstantInt::get(InVal->getType(), Offset), "extract");
Res = Builder.CreateTrunc(Res, Ty, "extract.t");
PredVal = Res;
EltPHI->addIncoming(Res, Pred);
@@ -1217,12 +1224,12 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// rewriting, we will ultimately delete the code we inserted. This
// means we need to revisit that PHI to make sure we extract out the
// needed piece.
- if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i)))
+ if (PHINode *OldInVal = dyn_cast<PHINode>(InVal))
if (PHIsInspected.count(OldInVal)) {
unsigned RefPHIId =
find(PHIsToSlice, OldInVal) - PHIsToSlice.begin();
- PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
- cast<Instruction>(Res)));
+ PHIUsers.push_back(
+ PHIUsageRecord(RefPHIId, Offset, cast<Instruction>(Res)));
++UserE;
}
}
@@ -1240,12 +1247,12 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
// with poison.
Value *Poison = PoisonValue::get(FirstPhi.getType());
- for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
- replaceInstUsesWith(*PHIsToSlice[i], Poison);
+ for (PHINode *PHI : drop_begin(PHIsToSlice))
+ replaceInstUsesWith(*PHI, Poison);
return replaceInstUsesWith(FirstPhi, Poison);
}
-static Value *SimplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,
+static Value *simplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,
const DominatorTree &DT) {
// Simplify the following patterns:
// if (cond)
@@ -1302,8 +1309,8 @@ static Value *SimplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,
DT.dominates(FalseOutEdge, FalseIncEdge))
// This Phi is actually equivalent to branching condition of IDom.
return Cond;
- else if (DT.dominates(TrueOutEdge, FalseIncEdge) &&
- DT.dominates(FalseOutEdge, TrueIncEdge)) {
+ if (DT.dominates(TrueOutEdge, FalseIncEdge) &&
+ DT.dominates(FalseOutEdge, TrueIncEdge)) {
// This Phi is actually opposite to branching condition of IDom. We invert
// the condition that will potentially open up some opportunities for
// sinking.
@@ -1369,7 +1376,7 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) {
SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;
PotentiallyDeadPHIs.insert(&PN);
- if (DeadPHICycle(PU, PotentiallyDeadPHIs))
+ if (isDeadPHICycle(PU, PotentiallyDeadPHIs))
return replaceInstUsesWith(PN, PoisonValue::get(PN.getType()));
}
@@ -1398,15 +1405,15 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
match(CmpInst->getOperand(1), m_Zero())) {
ConstantInt *NonZeroConst = nullptr;
bool MadeChange = false;
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
- Instruction *CtxI = PN.getIncomingBlock(i)->getTerminator();
- Value *VA = PN.getIncomingValue(i);
+ for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) {
+ Instruction *CtxI = PN.getIncomingBlock(I)->getTerminator();
+ Value *VA = PN.getIncomingValue(I);
if (isKnownNonZero(VA, DL, 0, &AC, CtxI, &DT)) {
if (!NonZeroConst)
- NonZeroConst = GetAnyNonZeroConstInt(PN);
+ NonZeroConst = getAnyNonZeroConstInt(PN);
if (NonZeroConst != VA) {
- replaceOperand(PN, i, NonZeroConst);
+ replaceOperand(PN, I, NonZeroConst);
MadeChange = true;
}
}
@@ -1457,17 +1464,17 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
// however.
PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin());
if (&PN != FirstPN)
- for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) {
- BasicBlock *BBA = PN.getIncomingBlock(i);
- BasicBlock *BBB = FirstPN->getIncomingBlock(i);
+ for (unsigned I = 0, E = FirstPN->getNumIncomingValues(); I != E; ++I) {
+ BasicBlock *BBA = PN.getIncomingBlock(I);
+ BasicBlock *BBB = FirstPN->getIncomingBlock(I);
if (BBA != BBB) {
- Value *VA = PN.getIncomingValue(i);
- unsigned j = PN.getBasicBlockIndex(BBB);
- Value *VB = PN.getIncomingValue(j);
- PN.setIncomingBlock(i, BBB);
- PN.setIncomingValue(i, VB);
- PN.setIncomingBlock(j, BBA);
- PN.setIncomingValue(j, VA);
+ Value *VA = PN.getIncomingValue(I);
+ unsigned J = PN.getBasicBlockIndex(BBB);
+ Value *VB = PN.getIncomingValue(J);
+ PN.setIncomingBlock(I, BBB);
+ PN.setIncomingValue(I, VB);
+ PN.setIncomingBlock(J, BBA);
+ PN.setIncomingValue(J, VA);
// NOTE: Instcombine normally would want us to "return &PN" if we
// modified any of the operands of an instruction. However, since we
// aren't adding or removing uses (just rearranging them) we don't do
@@ -1500,7 +1507,7 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
return Res;
// Ultimately, try to replace this Phi with a dominating condition.
- if (auto *V = SimplifyUsingControlFlow(*this, PN, DT))
+ if (auto *V = simplifyUsingControlFlow(*this, PN, DT))
return replaceInstUsesWith(PN, V);
return nullptr;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 71a5ae24eead..3f064cfda712 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1219,7 +1219,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP);
I != E; I++)
if (I.isStruct())
- return true;;
+ return true;
return false;
};
if (mayIndexStructType(cast<GetElementPtrInst>(*I)))
@@ -1228,10 +1228,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// Conservatively track the demanded elements back through any vector
// operands we may have. We know there must be at least one, or we
// wouldn't have a vector result to get here. Note that we intentionally
- // merge the undef bits here since gepping with either an undef base or
- // index results in undef.
+ // merge the undef bits here since gepping with either an poison base or
+ // index results in poison.
for (unsigned i = 0; i < I->getNumOperands(); i++) {
- if (match(I->getOperand(i), m_Undef())) {
+ if (i == 0 ? match(I->getOperand(i), m_Undef())
+ : match(I->getOperand(i), m_Poison())) {
// If the entire vector is undefined, just return this info.
UndefElts = EltMask;
return nullptr;
@@ -1239,7 +1240,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
if (I->getOperand(i)->getType()->isVectorTy()) {
APInt UndefEltsOp(VWidth, 0);
simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp);
- UndefElts |= UndefEltsOp;
+ // gep(x, undef) is not undef, so skip considering idx ops here
+ // Note that we could propagate poison, but we can't distinguish between
+ // undef & poison bits ATM
+ if (i == 0)
+ UndefElts |= UndefEltsOp;
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 029be5257694..3091905ca534 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -68,6 +68,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 6e72255e51ae..8f94172a6402 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1527,22 +1527,22 @@ void AddressSanitizer::getInterestingMemoryOperands(
return;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (!ClInstrumentReads || ignoreAccess(LI, LI->getPointerOperand()))
+ if (!ClInstrumentReads || ignoreAccess(I, LI->getPointerOperand()))
return;
Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
LI->getType(), LI->getAlign());
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (!ClInstrumentWrites || ignoreAccess(LI, SI->getPointerOperand()))
+ if (!ClInstrumentWrites || ignoreAccess(I, SI->getPointerOperand()))
return;
Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
SI->getValueOperand()->getType(), SI->getAlign());
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
- if (!ClInstrumentAtomics || ignoreAccess(LI, RMW->getPointerOperand()))
+ if (!ClInstrumentAtomics || ignoreAccess(I, RMW->getPointerOperand()))
return;
Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
RMW->getValOperand()->getType(), None);
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
- if (!ClInstrumentAtomics || ignoreAccess(LI, XCHG->getPointerOperand()))
+ if (!ClInstrumentAtomics || ignoreAccess(I, XCHG->getPointerOperand()))
return;
Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
XCHG->getCompareOperand()->getType(), None);
@@ -1556,7 +1556,7 @@ void AddressSanitizer::getInterestingMemoryOperands(
return;
auto BasePtr = CI->getOperand(OpOffset);
- if (ignoreAccess(LI, BasePtr))
+ if (ignoreAccess(I, BasePtr))
return;
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
MaybeAlign Alignment = Align(1);
@@ -1568,7 +1568,7 @@ void AddressSanitizer::getInterestingMemoryOperands(
} else {
for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
- ignoreAccess(LI, CI->getArgOperand(ArgNo)))
+ ignoreAccess(I, CI->getArgOperand(ArgNo)))
continue;
Type *Ty = CI->getParamByValType(ArgNo);
Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index fb10a99d1338..7b3741d19a1b 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -304,6 +304,7 @@ public:
static bool isStandardLifetime(const AllocaInfo &AllocaInfo,
const DominatorTree &DT);
bool instrumentStack(
+ bool ShouldDetectUseAfterScope,
MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument,
SmallVector<Instruction *, 4> &UnrecognizedLifetimes,
DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
@@ -1359,6 +1360,7 @@ bool HWAddressSanitizer::isStandardLifetime(const AllocaInfo &AllocaInfo,
}
bool HWAddressSanitizer::instrumentStack(
+ bool ShouldDetectUseAfterScope,
MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument,
SmallVector<Instruction *, 4> &UnrecognizedLifetimes,
DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
@@ -1410,7 +1412,7 @@ bool HWAddressSanitizer::instrumentStack(
};
bool StandardLifetime =
UnrecognizedLifetimes.empty() && isStandardLifetime(Info, GetDT());
- if (DetectUseAfterScope && StandardLifetime) {
+ if (ShouldDetectUseAfterScope && StandardLifetime) {
IntrinsicInst *Start = Info.LifetimeStart[0];
IRB.SetInsertPoint(Start->getNextNode());
tagAlloca(IRB, AI, Tag, Size);
@@ -1505,8 +1507,14 @@ bool HWAddressSanitizer::sanitizeFunction(
SmallVector<Instruction *, 8> LandingPadVec;
SmallVector<Instruction *, 4> UnrecognizedLifetimes;
DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> AllocaDbgMap;
+ bool CallsReturnTwice = false;
for (auto &BB : F) {
for (auto &Inst : BB) {
+ if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
+ if (CI->canReturnTwice()) {
+ CallsReturnTwice = true;
+ }
+ }
if (InstrumentStack) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
if (isInterestingAlloca(*AI))
@@ -1531,9 +1539,14 @@ bool HWAddressSanitizer::sanitizeFunction(
}
}
- if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst) ||
- isa<CleanupReturnInst>(Inst))
+ if (isa<ReturnInst>(Inst)) {
+ if (CallInst *CI = Inst.getParent()->getTerminatingMustTailCall())
+ RetVec.push_back(CI);
+ else
+ RetVec.push_back(&Inst);
+ } else if (isa<ResumeInst, CleanupReturnInst>(Inst)) {
RetVec.push_back(&Inst);
+ }
if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst)) {
for (Value *V : DVI->location_ops()) {
@@ -1585,7 +1598,12 @@ bool HWAddressSanitizer::sanitizeFunction(
if (!AllocasToInstrument.empty()) {
Value *StackTag =
ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
- instrumentStack(AllocasToInstrument, UnrecognizedLifetimes, AllocaDbgMap,
+ // Calls to functions that may return twice (e.g. setjmp) confuse the
+ // postdominator analysis, and will leave us to keep memory tagged after
+ // function return. Work around this by always untagging at every return
+ // statement if return_twice functions are called.
+ instrumentStack(DetectUseAfterScope && !CallsReturnTwice,
+ AllocasToInstrument, UnrecognizedLifetimes, AllocaDbgMap,
RetVec, StackTag, GetDT, GetPDT);
}
// Pad and align each of the allocas that we instrumented to stop small
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index ab179b03dd29..6868408ef5f5 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -456,6 +456,9 @@ bool InstrProfiling::lowerIntrinsics(Function *F) {
} else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(&Instr)) {
lowerIncrement(IPI);
MadeChange = true;
+ } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(&Instr)) {
+ lowerCover(IPC);
+ MadeChange = true;
} else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(&Instr)) {
lowerValueProfileInst(IPVP);
MadeChange = true;
@@ -539,7 +542,8 @@ static bool containsProfilingIntrinsics(Module &M) {
return !F->use_empty();
return false;
};
- return containsIntrinsic(llvm::Intrinsic::instrprof_increment) ||
+ return containsIntrinsic(llvm::Intrinsic::instrprof_cover) ||
+ containsIntrinsic(llvm::Intrinsic::instrprof_increment) ||
containsIntrinsic(llvm::Intrinsic::instrprof_increment_step) ||
containsIntrinsic(llvm::Intrinsic::instrprof_value_profile);
}
@@ -689,47 +693,58 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
Ind->eraseFromParent();
}
-void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
- GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
-
- IRBuilder<> Builder(Inc);
- uint64_t Index = Inc->getIndex()->getZExtValue();
- Value *Addr = Builder.CreateConstInBoundsGEP2_32(Counters->getValueType(),
- Counters, 0, Index);
-
- if (isRuntimeCounterRelocationEnabled()) {
- Type *Int64Ty = Type::getInt64Ty(M->getContext());
- Type *Int64PtrTy = Type::getInt64PtrTy(M->getContext());
- Function *Fn = Inc->getParent()->getParent();
- Instruction &I = Fn->getEntryBlock().front();
- LoadInst *LI = dyn_cast<LoadInst>(&I);
- if (!LI) {
- IRBuilder<> Builder(&I);
- GlobalVariable *Bias =
- M->getGlobalVariable(getInstrProfCounterBiasVarName());
- if (!Bias) {
- // Compiler must define this variable when runtime counter relocation
- // is being used. Runtime has a weak external reference that is used
- // to check whether that's the case or not.
- Bias = new GlobalVariable(
- *M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
- Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName());
- Bias->setVisibility(GlobalVariable::HiddenVisibility);
- // A definition that's weak (linkonce_odr) without being in a COMDAT
- // section wouldn't lead to link errors, but it would lead to a dead
- // data word from every TU but one. Putting it in COMDAT ensures there
- // will be exactly one data slot in the link.
- if (TT.supportsCOMDAT())
- Bias->setComdat(M->getOrInsertComdat(Bias->getName()));
- }
- LI = Builder.CreateLoad(Int64Ty, Bias);
+Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) {
+ auto *Counters = getOrCreateRegionCounters(I);
+ IRBuilder<> Builder(I);
+
+ auto *Addr = Builder.CreateConstInBoundsGEP2_32(
+ Counters->getValueType(), Counters, 0, I->getIndex()->getZExtValue());
+
+ if (!isRuntimeCounterRelocationEnabled())
+ return Addr;
+
+ Type *Int64Ty = Type::getInt64Ty(M->getContext());
+ Function *Fn = I->getParent()->getParent();
+ Instruction &EntryI = Fn->getEntryBlock().front();
+ LoadInst *LI = dyn_cast<LoadInst>(&EntryI);
+ if (!LI) {
+ IRBuilder<> EntryBuilder(&EntryI);
+ auto *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName());
+ if (!Bias) {
+ // Compiler must define this variable when runtime counter relocation
+ // is being used. Runtime has a weak external reference that is used
+ // to check whether that's the case or not.
+ Bias = new GlobalVariable(
+ *M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
+ Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName());
+ Bias->setVisibility(GlobalVariable::HiddenVisibility);
+ // A definition that's weak (linkonce_odr) without being in a COMDAT
+ // section wouldn't lead to link errors, but it would lead to a dead
+ // data word from every TU but one. Putting it in COMDAT ensures there
+ // will be exactly one data slot in the link.
+ if (TT.supportsCOMDAT())
+ Bias->setComdat(M->getOrInsertComdat(Bias->getName()));
}
- auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI);
- Addr = Builder.CreateIntToPtr(Add, Int64PtrTy);
+ LI = EntryBuilder.CreateLoad(Int64Ty, Bias);
}
+ auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI);
+ return Builder.CreateIntToPtr(Add, Addr->getType());
+}
+
+void InstrProfiling::lowerCover(InstrProfCoverInst *CoverInstruction) {
+ auto *Addr = getCounterAddress(CoverInstruction);
+ IRBuilder<> Builder(CoverInstruction);
+ // We store zero to represent that this block is covered.
+ Builder.CreateStore(Builder.getInt8(0), Addr);
+ CoverInstruction->eraseFromParent();
+}
+
+void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
+ auto *Addr = getCounterAddress(Inc);
+ IRBuilder<> Builder(Inc);
if (Options.Atomic || AtomicCounterUpdateAll ||
- (Index == 0 && AtomicFirstCounter)) {
+ (Inc->getIndex()->isZeroValue() && AtomicFirstCounter)) {
Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
MaybeAlign(), AtomicOrdering::Monotonic);
} else {
@@ -849,6 +864,31 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
}
GlobalVariable *
+InstrProfiling::createRegionCounters(InstrProfInstBase *Inc, StringRef Name,
+ GlobalValue::LinkageTypes Linkage) {
+ uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
+ auto &Ctx = M->getContext();
+ GlobalVariable *GV;
+ if (isa<InstrProfCoverInst>(Inc)) {
+ auto *CounterTy = Type::getInt8Ty(Ctx);
+ auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters);
+ // TODO: `Constant::getAllOnesValue()` does not yet accept an array type.
+ std::vector<Constant *> InitialValues(NumCounters,
+ Constant::getAllOnesValue(CounterTy));
+ GV = new GlobalVariable(*M, CounterArrTy, false, Linkage,
+ ConstantArray::get(CounterArrTy, InitialValues),
+ Name);
+ GV->setAlignment(Align(1));
+ } else {
+ auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
+ GV = new GlobalVariable(*M, CounterTy, false, Linkage,
+ Constant::getNullValue(CounterTy), Name);
+ GV->setAlignment(Align(8));
+ }
+ return GV;
+}
+
+GlobalVariable *
InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
GlobalVariable *NamePtr = Inc->getName();
auto &PD = ProfileDataMap[NamePtr];
@@ -914,16 +954,11 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
LLVMContext &Ctx = M->getContext();
- ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
- // Create the counters variable.
- auto *CounterPtr =
- new GlobalVariable(*M, CounterTy, false, Linkage,
- Constant::getNullValue(CounterTy), CntsVarName);
+ auto *CounterPtr = createRegionCounters(Inc, CntsVarName, Linkage);
CounterPtr->setVisibility(Visibility);
CounterPtr->setSection(
getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
- CounterPtr->setAlignment(Align(8));
MaybeSetComdat(CounterPtr);
CounterPtr->setLinkage(Linkage);
PD.RegionCounters = CounterPtr;
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 8fedefccf0e1..5e078f2c4212 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index cfe993dedbc2..c51acdf52f14 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -182,6 +182,7 @@
#include "llvm/IR/ValueMap.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -1718,11 +1719,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Figure out maximal valid memcpy alignment.
const Align ArgAlign = DL.getValueOrABITypeAlignment(
MaybeAlign(FArg.getParamAlignment()), FArg.getParamByValType());
- Value *CpShadowPtr =
+ Value *CpShadowPtr, *CpOriginPtr;
+ std::tie(CpShadowPtr, CpOriginPtr) =
getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
- /*isStore*/ true)
- .first;
- // TODO(glider): need to copy origins.
+ /*isStore*/ true);
if (!PropagateShadow || Overflow) {
// ParamTLS overflow.
EntryIRB.CreateMemSet(
@@ -1735,6 +1735,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
CopyAlign, Size);
LLVM_DEBUG(dbgs() << " ByValCpy: " << *Cpy << "\n");
(void)Cpy;
+
+ if (MS.TrackOrigins) {
+ Value *OriginPtr =
+ getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
+ // FIXME: OriginSize should be:
+ // alignTo(V % kMinOriginAlignment + Size, kMinOriginAlignment)
+ unsigned OriginSize = alignTo(Size, kMinOriginAlignment);
+ EntryIRB.CreateMemCpy(
+ CpOriginPtr,
+ /* by getShadowOriginPtr */ kMinOriginAlignment, OriginPtr,
+ /* by origin_tls[ArgOffset] */ kMinOriginAlignment,
+ OriginSize);
+ }
}
}
@@ -3701,7 +3714,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
insertShadowCheck(A, &CB);
Size = DL.getTypeAllocSize(A->getType());
} else {
- bool ArgIsInitialized = false;
Value *Store = nullptr;
// Compute the Shadow for arg even if it is ByVal, because
// in that case getShadow() will copy the actual arg shadow to
@@ -3722,10 +3734,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
MaybeAlign Alignment = llvm::None;
if (ParamAlignment)
Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
- Value *AShadowPtr =
+ Value *AShadowPtr, *AOriginPtr;
+ std::tie(AShadowPtr, AOriginPtr) =
getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
- /*isStore*/ false)
- .first;
+ /*isStore*/ false);
if (!PropagateShadow) {
Store = IRB.CreateMemSet(ArgShadowBase,
Constant::getNullValue(IRB.getInt8Ty()),
@@ -3733,6 +3745,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
} else {
Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
Alignment, Size);
+ if (MS.TrackOrigins) {
+ Value *ArgOriginBase = getOriginPtrForArgument(A, IRB, ArgOffset);
+ // FIXME: OriginSize should be:
+ // alignTo(A % kMinOriginAlignment + Size, kMinOriginAlignment)
+ unsigned OriginSize = alignTo(Size, kMinOriginAlignment);
+ IRB.CreateMemCpy(
+ ArgOriginBase,
+ /* by origin_tls[ArgOffset] */ kMinOriginAlignment,
+ AOriginPtr,
+ /* by getShadowOriginPtr */ kMinOriginAlignment, OriginSize);
+ }
}
} else {
// Any other parameters mean we need bit-grained tracking of uninit
@@ -3743,12 +3766,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
kShadowTLSAlignment);
Constant *Cst = dyn_cast<Constant>(ArgShadow);
- if (Cst && Cst->isNullValue())
- ArgIsInitialized = true;
+ if (MS.TrackOrigins && !(Cst && Cst->isNullValue())) {
+ IRB.CreateStore(getOrigin(A),
+ getOriginPtrForArgument(A, IRB, ArgOffset));
+ }
}
- if (MS.TrackOrigins && !ArgIsInitialized)
- IRB.CreateStore(getOrigin(A),
- getOriginPtrForArgument(A, IRB, ArgOffset));
(void)Store;
assert(Store != nullptr);
LLVM_DEBUG(dbgs() << " Param:" << *Store << "\n");
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index c46415e5b1f4..0902a94452e3 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -255,6 +255,11 @@ static cl::opt<bool> PGOInstrumentEntry(
"pgo-instrument-entry", cl::init(false), cl::Hidden,
cl::desc("Force to instrument function entry basicblock."));
+static cl::opt<bool> PGOFunctionEntryCoverage(
+ "pgo-function-entry-coverage", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+ cl::desc(
+ "Use this option to enable function entry coverage instrumentation."));
+
static cl::opt<bool>
PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
cl::desc("Fix function entry count in profile use."));
@@ -337,6 +342,33 @@ static const char *ValueProfKindDescr[] = {
#include "llvm/ProfileData/InstrProfData.inc"
};
+// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
+// aware this is an ir_level profile so it can set the version flag.
+static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) {
+ const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
+ Type *IntTy64 = Type::getInt64Ty(M.getContext());
+ uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
+ if (IsCS)
+ ProfileVersion |= VARIANT_MASK_CSIR_PROF;
+ if (PGOInstrumentEntry)
+ ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
+ if (DebugInfoCorrelate)
+ ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
+ if (PGOFunctionEntryCoverage)
+ ProfileVersion |=
+ VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
+ auto IRLevelVersionVariable = new GlobalVariable(
+ M, IntTy64, true, GlobalValue::WeakAnyLinkage,
+ Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
+ IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility);
+ Triple TT(M.getTargetTriple());
+ if (TT.supportsCOMDAT()) {
+ IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
+ IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
+ }
+ return IRLevelVersionVariable;
+}
+
namespace {
/// The select instruction visitor plays three roles specified
@@ -469,9 +501,7 @@ private:
createProfileFileNameVar(M, InstrProfileOutput);
// The variable in a comdat may be discarded by LTO. Ensure the
// declaration will be retained.
- appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true,
- PGOInstrumentEntry,
- DebugInfoCorrelate));
+ appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true));
return false;
}
std::string InstrProfileOutput;
@@ -914,22 +944,39 @@ static void instrumentOneFunc(
FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(
F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry);
+
+ Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
+ auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy);
+ auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
+ FuncInfo.FunctionHash);
+ if (PGOFunctionEntryCoverage) {
+ assert(!IsCS &&
+ "entry coverge does not support context-sensitive instrumentation");
+ auto &EntryBB = F.getEntryBlock();
+ IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
+ // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
+ // i32 <index>)
+ Builder.CreateCall(
+ Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
+ {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
+ return;
+ }
+
std::vector<BasicBlock *> InstrumentBBs;
FuncInfo.getInstrumentBBs(InstrumentBBs);
unsigned NumCounters =
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
uint32_t I = 0;
- Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
for (auto *InstrBB : InstrumentBBs) {
IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
assert(Builder.GetInsertPoint() != InstrBB->end() &&
"Cannot get the Instrumentation point");
+ // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
+ // i32 <index>)
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
- {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
- Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
- Builder.getInt32(I++)});
+ {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
}
// Now instrument select instructions:
@@ -1502,6 +1549,8 @@ void PGOUseFunc::annotateIrrLoopHeaderWeights() {
}
void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
+ if (PGOFunctionEntryCoverage)
+ return;
Module *M = F.getParent();
IRBuilder<> Builder(&SI);
Type *Int64Ty = Builder.getInt64Ty();
@@ -1622,8 +1671,7 @@ static bool InstrumentAllFunctions(
// For the context-sensitve instrumentation, we should have a separated pass
// (before LTO/ThinLTO linking) to create these variables.
if (!IsCS)
- createIRLevelProfileFlagVar(M, /*IsCS=*/false, PGOInstrumentEntry,
- DebugInfoCorrelate);
+ createIRLevelProfileFlagVar(M, /*IsCS=*/false);
std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
collectComdatMembers(M, ComdatMembers);
@@ -1645,9 +1693,7 @@ PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
createProfileFileNameVar(M, CSInstrName);
// The variable in a comdat may be discarded by LTO. Ensure the declaration
// will be retained.
- appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true,
- PGOInstrumentEntry,
- DebugInfoCorrelate));
+ appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true));
return PreservedAnalyses::all();
}
@@ -1844,6 +1890,18 @@ static bool annotateAllFunctions(
ProfileFileName.data(), "Not an IR level instrumentation profile"));
return false;
}
+ if (PGOReader->hasSingleByteCoverage()) {
+ Ctx.diagnose(DiagnosticInfoPGOProfile(
+ ProfileFileName.data(),
+ "Cannot use coverage profiles for optimization"));
+ return false;
+ }
+ if (PGOReader->functionEntryOnly()) {
+ Ctx.diagnose(DiagnosticInfoPGOProfile(
+ ProfileFileName.data(),
+ "Function entry profiles are not yet supported for optimization"));
+ return false;
+ }
// Add the profile summary (read from the header of the indexed summary) here
// so that we can use it below when reading counters (which checks if the
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
index 1ca6ddabac5b..126845bb3308 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -123,20 +123,9 @@ BundledRetainClaimRVs::~BundledRetainClaimRVs() {
// can't be tail calls.
if (auto *CI = dyn_cast<CallInst>(CB))
CI->setTailCallKind(CallInst::TCK_NoTail);
-
- if (UseMarker) {
- // Remove the retainRV/claimRV function operand from the operand bundle
- // to reflect the fact that the backend is responsible for emitting only
- // the marker instruction, but not the retainRV/claimRV call.
- OperandBundleDef OB("clang.arc.attachedcall", None);
- auto *NewCB = CallBase::Create(CB, OB, CB);
- CB->replaceAllUsesWith(NewCB);
- CB->eraseFromParent();
- }
}
- if (!ContractPass || !UseMarker)
- EraseInstruction(P.first);
+ EraseInstruction(P.first);
}
RVCalls.clear();
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/llvm/lib/Transforms/ObjCARC/ObjCARC.h
index 2b47bec7ffe8..62f88a8cc02b 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARC.h
@@ -105,8 +105,7 @@ CallInst *createCallInstWithColors(
class BundledRetainClaimRVs {
public:
- BundledRetainClaimRVs(bool ContractPass, bool UseMarker)
- : ContractPass(ContractPass), UseMarker(UseMarker) {}
+ BundledRetainClaimRVs(bool ContractPass) : ContractPass(ContractPass) {}
~BundledRetainClaimRVs();
/// Insert a retainRV/claimRV call to the normal destination blocks of invokes
@@ -156,9 +155,6 @@ private:
DenseMap<CallInst *, CallBase *> RVCalls;
bool ContractPass;
-
- /// Indicates whether the target uses a special inline-asm marker.
- bool UseMarker;
};
} // end namespace objcarc
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 9e2832827686..2985ae004d3c 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -434,23 +434,20 @@ bool ObjCARCContract::tryToPeepholeInstruction(
LLVM_FALLTHROUGH;
case ARCInstKind::RetainRV:
case ARCInstKind::UnsafeClaimRV: {
- bool IsInstContainedInBundle = BundledInsts->contains(Inst);
-
- // Return now if the target doesn't need a special inline-asm marker. Return
- // true if this is a bundled retainRV/claimRV call, which is going to be
- // erased at the end of this pass, to avoid undoing objc-arc-expand and
+ // Return true if this is a bundled retainRV/claimRV call, which is always
+ // redundant with the attachedcall in the bundle, and is going to be erased
+ // at the end of this pass. This avoids undoing objc-arc-expand and
// replacing uses of the retainRV/claimRV call's argument with its result.
- if (!RVInstMarker)
- return IsInstContainedInBundle;
-
- // The target needs a special inline-asm marker.
+ if (BundledInsts->contains(Inst))
+ return true;
- // We don't have to emit the marker if this is a bundled call since the
- // backend is responsible for emitting it. Return false to undo
- // objc-arc-expand.
- if (IsInstContainedInBundle)
+ // If this isn't a bundled call, and the target doesn't need a special
+ // inline-asm marker, we're done: return now, and undo objc-arc-expand.
+ if (!RVInstMarker)
return false;
+ // The target needs a special inline-asm marker. Insert it.
+
BasicBlock::iterator BBI = Inst->getIterator();
BasicBlock *InstParent = Inst->getParent();
@@ -548,7 +545,7 @@ bool ObjCARCContract::run(Function &F, AAResults *A, DominatorTree *D) {
AA = A;
DT = D;
PA.setAA(A);
- BundledRetainClaimRVs BRV(true, RVInstMarker);
+ BundledRetainClaimRVs BRV(/*ContractPass=*/true);
BundledInsts = &BRV;
std::pair<bool, bool> R = BundledInsts->insertAfterInvokes(F, DT);
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index b6dc97f1e43f..e1a000b31cf9 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -2459,7 +2459,7 @@ bool ObjCARCOpt::run(Function &F, AAResults &AA) {
return false;
Changed = CFGChanged = false;
- BundledRetainClaimRVs BRV(false, objcarc::getRVInstMarker(*F.getParent()));
+ BundledRetainClaimRVs BRV(/*ContractPass=*/false);
BundledInsts = &BRV;
LLVM_DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName()
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index dda1a2f08076..143a78f604fc 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -357,7 +357,7 @@ typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap;
// This map keeps track of all the new definitions for an instruction. This
// information is needed when restoring SSA form after cloning blocks.
-typedef DenseMap<Instruction *, std::vector<Instruction *>> DefMap;
+typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap;
inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
OS << "< ";
@@ -1126,6 +1126,9 @@ private:
/// Add new value mappings to the DefMap to keep track of all new definitions
/// for a particular instruction. These will be used while updating SSA form.
void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) {
+ SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector;
+ NewDefsVector.reserve(VMap.size());
+
for (auto Entry : VMap) {
Instruction *Inst =
dyn_cast<Instruction>(const_cast<Value *>(Entry.first));
@@ -1138,11 +1141,18 @@ private:
if (!Cloned)
continue;
- if (NewDefs.find(Inst) == NewDefs.end())
- NewDefs[Inst] = {Cloned};
- else
- NewDefs[Inst].push_back(Cloned);
+ NewDefsVector.push_back({Inst, Cloned});
}
+
+ // Sort the defs to get deterministic insertion order into NewDefs.
+ sort(NewDefsVector, [](const auto &LHS, const auto &RHS) {
+ if (LHS.first == RHS.first)
+ return LHS.second->comesBefore(RHS.second);
+ return LHS.first->comesBefore(RHS.first);
+ });
+
+ for (const auto &KV : NewDefsVector)
+ NewDefs[KV.first].push_back(KV.second);
}
/// Update the last branch of a particular cloned path to point to the correct
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index ca19913e37ee..bf4d275e04ba 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -192,6 +192,7 @@ struct FusionCandidate {
GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)),
Peeled(false), DT(DT), PDT(PDT), ORE(ORE) {
+ assert(DT && "Expected non-null DT!");
// Walk over all blocks in the loop and check for conditions that may
// prevent fusion. For each block, walk over all instructions and collect
// the memory reads and writes If any instructions that prevent fusion are
@@ -767,7 +768,7 @@ private:
LLVM_DEBUG(dbgs() << "Attempting to peel first " << PeelCount
<< " iterations of the first loop. \n");
- FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, &DT, &AC, true);
+ FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, DT, &AC, true);
if (FC0.Peeled) {
LLVM_DEBUG(dbgs() << "Done Peeling\n");
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 35ba4e2b4032..318c4c06f0f7 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1172,8 +1172,15 @@ bool LoopIdiomRecognize::processLoopStridedStore(
CallInst *NewCall;
if (SplatValue) {
- NewCall = Builder.CreateMemSet(BasePtr, SplatValue, NumBytes,
- MaybeAlign(StoreAlignment));
+ AAMDNodes AATags = TheStore->getAAMetadata();
+ if (auto CI = dyn_cast<ConstantInt>(NumBytes))
+ AATags = AATags.extendTo(CI->getZExtValue());
+ else
+ AATags = AATags.extendTo(-1);
+
+ NewCall = Builder.CreateMemSet(
+ BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment),
+ /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
} else {
// Everything is emitted in default address space
Type *Int8PtrTy = DestInt8PtrTy;
@@ -1452,17 +1459,28 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
+ AAMDNodes AATags = TheLoad->getAAMetadata();
+ AAMDNodes StoreAATags = TheStore->getAAMetadata();
+ AATags = AATags.merge(StoreAATags);
+ if (auto CI = dyn_cast<ConstantInt>(NumBytes))
+ AATags = AATags.extendTo(CI->getZExtValue());
+ else
+ AATags = AATags.extendTo(-1);
+
CallInst *NewCall = nullptr;
// Check whether to generate an unordered atomic memcpy:
// If the load or store are atomic, then they must necessarily be unordered
// by previous checks.
if (!TheStore->isAtomic() && !TheLoad->isAtomic()) {
if (UseMemMove)
- NewCall = Builder.CreateMemMove(StoreBasePtr, StoreAlign, LoadBasePtr,
- LoadAlign, NumBytes);
+ NewCall = Builder.CreateMemMove(
+ StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes,
+ /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
else
- NewCall = Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr,
- LoadAlign, NumBytes);
+ NewCall =
+ Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign,
+ NumBytes, /*isVolatile=*/false, AATags.TBAA,
+ AATags.TBAAStruct, AATags.Scope, AATags.NoAlias);
} else {
// For now don't support unordered atomic memmove.
if (UseMemMove)
@@ -1486,7 +1504,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// have an alignment but non-atomic loads/stores may not.
NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
StoreBasePtr, StoreAlign.getValue(), LoadBasePtr, LoadAlign.getValue(),
- NumBytes, StoreSize);
+ NumBytes, StoreSize, AATags.TBAA, AATags.TBAAStruct, AATags.Scope,
+ AATags.NoAlias);
}
NewCall->setDebugLoc(TheStore->getDebugLoc());
diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index 728d63fe2847..d3fcba10c275 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -468,7 +468,7 @@ private:
LI.removeBlock(BB);
}
- DetatchDeadBlocks(DeadLoopBlocks, &DTUpdates, /*KeepOneInputPHIs*/true);
+ detachDeadBlocks(DeadLoopBlocks, &DTUpdates, /*KeepOneInputPHIs*/true);
DTU.applyUpdates(DTUpdates);
DTUpdates.clear();
for (auto *BB : DeadLoopBlocks)
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 022d9c7abc8c..9beb2281cf0f 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1281,7 +1281,7 @@ static LoopUnrollResult tryToUnrollLoop(
<< " iterations";
});
- if (peelLoop(L, PP.PeelCount, LI, &SE, &DT, &AC, PreserveLCSSA)) {
+ if (peelLoop(L, PP.PeelCount, LI, &SE, DT, &AC, PreserveLCSSA)) {
simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI);
// If the loop was peeled, we already "used up" the profile information
// we had, so we don't want to unroll or peel again.
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 8f1d0181ee5b..296becb31e8f 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -1339,16 +1339,21 @@ public:
// Copy load operand to new alloca.
Builder.SetInsertPoint(Copy, Copy->begin());
- AllocaInst *NewLd =
- Builder.CreateAlloca(Load->getType(), Load->getPointerAddressSpace());
- Builder.CreateMemCpy(NewLd, NewLd->getAlign(),
- Load->getPointerOperand(), Load->getAlign(),
- LoadLoc.Size.getValue());
+ auto *VT = cast<FixedVectorType>(Load->getType());
+ // Use an array type for the alloca, to avoid potentially huge alignment
+ // requirements for large vector types.
+ auto *ArrayTy = ArrayType::get(VT->getElementType(), VT->getNumElements());
+ AllocaInst *Alloca =
+ Builder.CreateAlloca(ArrayTy, Load->getPointerAddressSpace());
+ Value *BC = Builder.CreateBitCast(Alloca, VT->getPointerTo());
+
+ Builder.CreateMemCpy(BC, Alloca->getAlign(), Load->getPointerOperand(),
+ Load->getAlign(), LoadLoc.Size.getValue());
Builder.SetInsertPoint(Fusion, Fusion->begin());
PHINode *PHI = Builder.CreatePHI(Load->getPointerOperandType(), 3);
PHI->addIncoming(Load->getPointerOperand(), Check0);
PHI->addIncoming(Load->getPointerOperand(), Check1);
- PHI->addIncoming(NewLd, Copy);
+ PHI->addIncoming(BC, Copy);
// Adjust DT.
DTUpdates.push_back({DT->Insert, Check0, Check1});
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 2476e6c408b1..f35c9212a6f9 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -77,6 +77,7 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -1736,18 +1737,18 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps,
if (Filtered.empty()) {
// If it has undef or poison at this point, it means there are no-non-undef
// arguments, and thus, the value of the phi node must be undef.
- if (HasPoison && !HasUndef) {
- LLVM_DEBUG(
- dbgs() << "PHI Node " << *I
- << " has no non-poison arguments, valuing it as poison\n");
- return createConstantExpression(PoisonValue::get(I->getType()));
- }
if (HasUndef) {
LLVM_DEBUG(
dbgs() << "PHI Node " << *I
<< " has no non-undef arguments, valuing it as undef\n");
return createConstantExpression(UndefValue::get(I->getType()));
}
+ if (HasPoison) {
+ LLVM_DEBUG(
+ dbgs() << "PHI Node " << *I
+ << " has no non-poison arguments, valuing it as poison\n");
+ return createConstantExpression(PoisonValue::get(I->getType()));
+ }
LLVM_DEBUG(dbgs() << "No arguments of PHI node " << *I << " are live\n");
deleteExpression(E);
@@ -1757,6 +1758,11 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps,
++Filtered.begin();
// Can't use std::equal here, sadly, because filter.begin moves.
if (llvm::all_of(Filtered, [&](Value *Arg) { return Arg == AllSameValue; })) {
+ // Can't fold phi(undef, X) -> X unless X can't be poison (thus X is undef
+ // in the worst case).
+ if (HasUndef && !isGuaranteedNotToBePoison(AllSameValue, AC, nullptr, DT))
+ return E;
+
// In LLVM's non-standard representation of phi nodes, it's possible to have
// phi nodes with cycles (IE dependent on other phis that are .... dependent
// on the original phi node), especially in weird CFG's where some arguments
@@ -1764,8 +1770,8 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps,
// infinite loops during evaluation. We work around this by not trying to
// really evaluate them independently, but instead using a variable
// expression to say if one is equivalent to the other.
- // We also special case undef, so that if we have an undef, we can't use the
- // common value unless it dominates the phi block.
+ // We also special case undef/poison, so that if we have an undef, we can't
+ // use the common value unless it dominates the phi block.
if (HasPoison || HasUndef) {
// If we have undef and at least one other value, this is really a
// multivalued phi, and we need to know if it's cycle free in order to
@@ -2853,14 +2859,14 @@ NewGVN::makePossiblePHIOfOps(Instruction *I,
}
// The algorithm initially places the values of the routine in the TOP
-// congruence class. The leader of TOP is the undetermined value `undef`.
+// congruence class. The leader of TOP is the undetermined value `poison`.
// When the algorithm has finished, values still in TOP are unreachable.
void NewGVN::initializeCongruenceClasses(Function &F) {
NextCongruenceNum = 0;
// Note that even though we use the live on entry def as a representative
// MemoryAccess, it is *not* the same as the actual live on entry def. We
- // have no real equivalemnt to undef for MemoryAccesses, and so we really
+ // have no real equivalent to poison for MemoryAccesses, and so we really
// should be checking whether the MemoryAccess is top if we want to know if it
// is equivalent to everything. Otherwise, what this really signifies is that
// the access "it reaches all the way back to the beginning of the function"
@@ -3031,7 +3037,7 @@ void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) {
!isMemoryAccessTOP(cast<MemoryAccess>(U)) &&
ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock});
});
- // If all that is left is nothing, our memoryphi is undef. We keep it as
+ // If all that is left is nothing, our memoryphi is poison. We keep it as
// InitialClass. Note: The only case this should happen is if we have at
// least one self-argument.
if (Filtered.begin() == Filtered.end()) {
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 3da367341d2a..b795ad3899bc 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -258,6 +258,7 @@ struct GCPtrLivenessData {
// base relation will remain. Internally, we add a mixture of the two
// types, then update all the second type to the first type
using DefiningValueMapTy = MapVector<Value *, Value *>;
+using PointerToBaseTy = MapVector<Value *, Value *>;
using StatepointLiveSetTy = SetVector<Value *>;
using RematerializedValueMapTy =
MapVector<AssertingVH<Instruction>, AssertingVH<Value>>;
@@ -266,9 +267,6 @@ struct PartiallyConstructedSafepointRecord {
/// The set of values known to be live across this safepoint
StatepointLiveSetTy LiveSet;
- /// Mapping from live pointers to a base-defining-value
- MapVector<Value *, Value *> PointerToBase;
-
/// The *new* gc.statepoint instruction itself. This produces the token
/// that normal path gc.relocates and the gc.result are tied to.
GCStatepointInst *StatepointToken;
@@ -1255,10 +1253,9 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// post condition: PointerToBase contains one (derived, base) pair for every
// pointer in live. Note that derived can be equal to base if the original
// pointer was a base pointer.
-static void
-findBasePointers(const StatepointLiveSetTy &live,
- MapVector<Value *, Value *> &PointerToBase,
- DominatorTree *DT, DefiningValueMapTy &DVCache) {
+static void findBasePointers(const StatepointLiveSetTy &live,
+ PointerToBaseTy &PointerToBase, DominatorTree *DT,
+ DefiningValueMapTy &DVCache) {
for (Value *ptr : live) {
Value *base = findBasePointer(ptr, DVCache);
assert(base && "failed to find base pointer");
@@ -1274,8 +1271,8 @@ findBasePointers(const StatepointLiveSetTy &live,
/// parse point.
static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
CallBase *Call,
- PartiallyConstructedSafepointRecord &result) {
- MapVector<Value *, Value *> PointerToBase;
+ PartiallyConstructedSafepointRecord &result,
+ PointerToBaseTy &PointerToBase) {
StatepointLiveSetTy PotentiallyDerivedPointers = result.LiveSet;
// We assume that all pointers passed to deopt are base pointers; as an
// optimization, we can use this to avoid seperately materializing the base
@@ -1290,37 +1287,27 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
PointerToBase[V] = V;
}
findBasePointers(PotentiallyDerivedPointers, PointerToBase, &DT, DVCache);
-
- if (PrintBasePointers) {
- errs() << "Base Pairs (w/o Relocation):\n";
- for (auto &Pair : PointerToBase) {
- errs() << " derived ";
- Pair.first->printAsOperand(errs(), false);
- errs() << " base ";
- Pair.second->printAsOperand(errs(), false);
- errs() << "\n";;
- }
- }
-
- result.PointerToBase = PointerToBase;
}
/// Given an updated version of the dataflow liveness results, update the
/// liveset and base pointer maps for the call site CS.
static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
CallBase *Call,
- PartiallyConstructedSafepointRecord &result);
+ PartiallyConstructedSafepointRecord &result,
+ PointerToBaseTy &PointerToBase);
static void recomputeLiveInValues(
Function &F, DominatorTree &DT, ArrayRef<CallBase *> toUpdate,
- MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
+ MutableArrayRef<struct PartiallyConstructedSafepointRecord> records,
+ PointerToBaseTy &PointerToBase) {
// TODO-PERF: reuse the original liveness, then simply run the dataflow
// again. The old values are still live and will help it stabilize quickly.
GCPtrLivenessData RevisedLivenessData;
computeLiveInValues(DT, F, RevisedLivenessData);
for (size_t i = 0; i < records.size(); i++) {
struct PartiallyConstructedSafepointRecord &info = records[i];
- recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info);
+ recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info,
+ PointerToBase);
}
}
@@ -1537,7 +1524,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
const SmallVectorImpl<Value *> &BasePtrs,
const SmallVectorImpl<Value *> &LiveVariables,
PartiallyConstructedSafepointRecord &Result,
- std::vector<DeferredReplacement> &Replacements) {
+ std::vector<DeferredReplacement> &Replacements,
+ const PointerToBaseTy &PointerToBase) {
assert(BasePtrs.size() == LiveVariables.size());
// Then go ahead and use the builder do actually do the inserts. We insert
@@ -1626,10 +1614,10 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
auto &Context = Call->getContext();
auto &DL = Call->getModule()->getDataLayout();
auto GetBaseAndOffset = [&](Value *Derived) {
- assert(Result.PointerToBase.count(Derived));
+ assert(PointerToBase.count(Derived));
unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
- Value *Base = Result.PointerToBase.find(Derived)->second;
+ Value *Base = PointerToBase.find(Derived)->second;
Value *Base_int = Builder.CreatePtrToInt(
Base, Type::getIntNTy(Context, IntPtrSize));
Value *Derived_int = Builder.CreatePtrToInt(
@@ -1819,9 +1807,9 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
static void
makeStatepointExplicit(DominatorTree &DT, CallBase *Call,
PartiallyConstructedSafepointRecord &Result,
- std::vector<DeferredReplacement> &Replacements) {
+ std::vector<DeferredReplacement> &Replacements,
+ const PointerToBaseTy &PointerToBase) {
const auto &LiveSet = Result.LiveSet;
- const auto &PointerToBase = Result.PointerToBase;
// Convert to vector for efficient cross referencing.
SmallVector<Value *, 64> BaseVec, LiveVec;
@@ -1836,7 +1824,8 @@ makeStatepointExplicit(DominatorTree &DT, CallBase *Call,
assert(LiveVec.size() == BaseVec.size());
// Do the actual rewriting and delete the old statepoint
- makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements);
+ makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements,
+ PointerToBase);
}
// Helper function for the relocationViaAlloca.
@@ -2238,6 +2227,7 @@ static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPh
// relocated values we don't do any user adjustments here.
static void rematerializeLiveValues(CallBase *Call,
PartiallyConstructedSafepointRecord &Info,
+ PointerToBaseTy &PointerToBase,
TargetTransformInfo &TTI) {
const unsigned int ChainLengthThreshold = 10;
@@ -2248,7 +2238,7 @@ static void rematerializeLiveValues(CallBase *Call,
for (Value *LiveValue: Info.LiveSet) {
// For each live pointer find its defining chain
SmallVector<Instruction *, 3> ChainToBase;
- assert(Info.PointerToBase.count(LiveValue));
+ assert(PointerToBase.count(LiveValue));
Value *RootOfChain =
findRematerializableChainToBasePointer(ChainToBase,
LiveValue);
@@ -2260,9 +2250,9 @@ static void rematerializeLiveValues(CallBase *Call,
// Handle the scenario where the RootOfChain is not equal to the
// Base Value, but they are essentially the same phi values.
- if (RootOfChain != Info.PointerToBase[LiveValue]) {
+ if (RootOfChain != PointerToBase[LiveValue]) {
PHINode *OrigRootPhi = dyn_cast<PHINode>(RootOfChain);
- PHINode *AlternateRootPhi = dyn_cast<PHINode>(Info.PointerToBase[LiveValue]);
+ PHINode *AlternateRootPhi = dyn_cast<PHINode>(PointerToBase[LiveValue]);
if (!OrigRootPhi || !AlternateRootPhi)
continue;
// PHI nodes that have the same incoming values, and belonging to the same
@@ -2362,7 +2352,7 @@ static void rematerializeLiveValues(CallBase *Call,
Instruction *InsertBefore = Call->getNextNode();
assert(InsertBefore);
Instruction *RematerializedValue = rematerializeChain(
- InsertBefore, RootOfChain, Info.PointerToBase[LiveValue]);
+ InsertBefore, RootOfChain, PointerToBase[LiveValue]);
Info.RematerializedValues[RematerializedValue] = LiveValue;
} else {
auto *Invoke = cast<InvokeInst>(Call);
@@ -2373,9 +2363,9 @@ static void rematerializeLiveValues(CallBase *Call,
&*Invoke->getUnwindDest()->getFirstInsertionPt();
Instruction *NormalRematerializedValue = rematerializeChain(
- NormalInsertBefore, RootOfChain, Info.PointerToBase[LiveValue]);
+ NormalInsertBefore, RootOfChain, PointerToBase[LiveValue]);
Instruction *UnwindRematerializedValue = rematerializeChain(
- UnwindInsertBefore, RootOfChain, Info.PointerToBase[LiveValue]);
+ UnwindInsertBefore, RootOfChain, PointerToBase[LiveValue]);
Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
@@ -2491,10 +2481,24 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// site.
findLiveReferences(F, DT, ToUpdate, Records);
+ /// Global mapping from live pointers to a base-defining-value.
+ PointerToBaseTy PointerToBase;
+
// B) Find the base pointers for each live pointer
for (size_t i = 0; i < Records.size(); i++) {
PartiallyConstructedSafepointRecord &info = Records[i];
- findBasePointers(DT, DVCache, ToUpdate[i], info);
+ findBasePointers(DT, DVCache, ToUpdate[i], info, PointerToBase);
+ }
+ if (PrintBasePointers) {
+ errs() << "Base Pairs (w/o Relocation):\n";
+ for (auto &Pair : PointerToBase) {
+ errs() << " derived ";
+ Pair.first->printAsOperand(errs(), false);
+ errs() << " base ";
+ Pair.second->printAsOperand(errs(), false);
+ errs() << "\n";
+ ;
+ }
}
// The base phi insertion logic (for any safepoint) may have inserted new
@@ -2515,8 +2519,10 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
PartiallyConstructedSafepointRecord &Info = Records[i];
SmallVector<Value *, 128> Bases;
- for (auto Pair : Info.PointerToBase)
- Bases.push_back(Pair.second);
+ for (auto *Derived : Info.LiveSet) {
+ assert(PointerToBase.count(Derived) && "Missed base for derived pointer");
+ Bases.push_back(PointerToBase[Derived]);
+ }
insertUseHolderAfter(ToUpdate[i], Bases, Holders);
}
@@ -2524,18 +2530,16 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// By selecting base pointers, we've effectively inserted new uses. Thus, we
// need to rerun liveness. We may *also* have inserted new defs, but that's
// not the key issue.
- recomputeLiveInValues(F, DT, ToUpdate, Records);
+ recomputeLiveInValues(F, DT, ToUpdate, Records, PointerToBase);
if (PrintBasePointers) {
- for (auto &Info : Records) {
- errs() << "Base Pairs: (w/Relocation)\n";
- for (auto Pair : Info.PointerToBase) {
- errs() << " derived ";
- Pair.first->printAsOperand(errs(), false);
- errs() << " base ";
- Pair.second->printAsOperand(errs(), false);
- errs() << "\n";
- }
+ errs() << "Base Pairs: (w/Relocation)\n";
+ for (auto Pair : PointerToBase) {
+ errs() << " derived ";
+ Pair.first->printAsOperand(errs(), false);
+ errs() << " base ";
+ Pair.second->printAsOperand(errs(), false);
+ errs() << "\n";
}
}
@@ -2547,10 +2551,12 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// Note that the relocation placement code relies on this filtering for
// correctness as it expects the base to be in the liveset, which isn't true
// if the base is constant.
- for (auto &Info : Records)
- for (auto &BasePair : Info.PointerToBase)
- if (isa<Constant>(BasePair.second))
- Info.LiveSet.remove(BasePair.first);
+ for (auto &Info : Records) {
+ Info.LiveSet.remove_if([&](Value *LiveV) {
+ assert(PointerToBase.count(LiveV) && "Missed base for derived pointer");
+ return isa<Constant>(PointerToBase[LiveV]);
+ });
+ }
for (CallInst *CI : Holders)
CI->eraseFromParent();
@@ -2561,7 +2567,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// some values instead of relocating them. This is purely an optimization and
// does not influence correctness.
for (size_t i = 0; i < Records.size(); i++)
- rematerializeLiveValues(ToUpdate[i], Records[i], TTI);
+ rematerializeLiveValues(ToUpdate[i], Records[i], PointerToBase, TTI);
// We need this to safely RAUW and delete call or invoke return values that
// may themselves be live over a statepoint. For details, please see usage in
@@ -2575,7 +2581,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// previous statepoint can not be a live variable, thus we can and remove
// the old statepoint calls as we go.)
for (size_t i = 0; i < Records.size(); i++)
- makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements);
+ makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements,
+ PointerToBase);
ToUpdate.clear(); // prevent accident use of invalid calls.
@@ -2594,8 +2601,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// these live sets, and migrate to using that data structure from this point
// onward.
Info.LiveSet.clear();
- Info.PointerToBase.clear();
}
+ PointerToBase.clear();
// Do all the fixups of the original live variables to their relocated selves
SmallVector<Value *, 128> Live;
@@ -3115,35 +3122,15 @@ static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
CallBase *Call,
- PartiallyConstructedSafepointRecord &Info) {
+ PartiallyConstructedSafepointRecord &Info,
+ PointerToBaseTy &PointerToBase) {
StatepointLiveSetTy Updated;
findLiveSetAtInst(Call, RevisedLivenessData, Updated);
// We may have base pointers which are now live that weren't before. We need
// to update the PointerToBase structure to reflect this.
for (auto V : Updated)
- Info.PointerToBase.insert({V, V});
-
-#ifndef NDEBUG
- for (auto V : Updated)
- assert(Info.PointerToBase.count(V) &&
- "Must be able to find base for live value!");
-#endif
-
- // Remove any stale base mappings - this can happen since our liveness is
- // more precise then the one inherent in the base pointer analysis.
- DenseSet<Value *> ToErase;
- for (auto KVPair : Info.PointerToBase)
- if (!Updated.count(KVPair.first))
- ToErase.insert(KVPair.first);
-
- for (auto *V : ToErase)
- Info.PointerToBase.erase(V);
-
-#ifndef NDEBUG
- for (auto KVPair : Info.PointerToBase)
- assert(Updated.count(KVPair.first) && "record for non-live value");
-#endif
+ PointerToBase.insert({ V, V });
Info.LiveSet = Updated;
}
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 35497ae5ed9a..8be8946702be 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -48,6 +48,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index ac580b4161f4..b3a445368537 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -276,6 +276,8 @@ class StructurizeCFG {
void insertConditions(bool Loops);
+ void simplifyConditions();
+
void delPhiValues(BasicBlock *From, BasicBlock *To);
void addPhiValues(BasicBlock *From, BasicBlock *To);
@@ -586,6 +588,28 @@ void StructurizeCFG::insertConditions(bool Loops) {
}
}
+/// Simplify any inverted conditions that were built by buildConditions.
+void StructurizeCFG::simplifyConditions() {
+ SmallVector<Instruction *> InstToErase;
+ for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
+ auto &Preds = I.second;
+ for (auto &J : Preds) {
+ auto &Cond = J.second;
+ Instruction *Inverted;
+ if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
+ !Cond->use_empty()) {
+ if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
+ InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
+ Cond->replaceAllUsesWith(InvertedCmp);
+ InstToErase.push_back(cast<Instruction>(Cond));
+ }
+ }
+ }
+ }
+ for (auto *I : InstToErase)
+ I->eraseFromParent();
+}
+
/// Remove all PHI values coming from "From" into "To" and remember
/// them in DeletedPhis
void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
@@ -1065,6 +1089,7 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
createFlow();
insertConditions(false);
insertConditions(true);
+ simplifyConditions();
setPhiValues();
simplifyAffectedPhis();
rebuildSSA();
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index d6d6b1a7fa09..15c4a64eb794 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -59,7 +59,7 @@ static cl::opt<unsigned> MaxDeoptOrUnreachableSuccessorCheckDepth(
"is followed by a block that either has a terminating "
"deoptimizing call or is terminated with an unreachable"));
-void llvm::DetatchDeadBlocks(
+void llvm::detachDeadBlocks(
ArrayRef<BasicBlock *> BBs,
SmallVectorImpl<DominatorTree::UpdateType> *Updates,
bool KeepOneInputPHIs) {
@@ -110,7 +110,7 @@ void llvm::DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs, DomTreeUpdater *DTU,
#endif
SmallVector<DominatorTree::UpdateType, 4> Updates;
- DetatchDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs);
+ detachDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs);
if (DTU)
DTU->applyUpdates(Updates);
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 048e691e33cf..86413df664a0 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -694,38 +694,39 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
VMap[OrigV] = I;
}
+ // Simplify conditional branches and switches with a constant operand. We try
+ // to prune these out when cloning, but if the simplification required
+ // looking through PHI nodes, those are only available after forming the full
+ // basic block. That may leave some here, and we still want to prune the dead
+ // code as early as possible.
+ Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
+ for (BasicBlock &BB : make_range(Begin, NewFunc->end()))
+ ConstantFoldTerminator(&BB);
+
+ // Some blocks may have become unreachable as a result. Find and delete them.
+ {
+ SmallPtrSet<BasicBlock *, 16> ReachableBlocks;
+ SmallVector<BasicBlock *, 16> Worklist;
+ Worklist.push_back(&*Begin);
+ while (!Worklist.empty()) {
+ BasicBlock *BB = Worklist.pop_back_val();
+ if (ReachableBlocks.insert(BB).second)
+ append_range(Worklist, successors(BB));
+ }
+
+ SmallVector<BasicBlock *, 16> UnreachableBlocks;
+ for (BasicBlock &BB : make_range(Begin, NewFunc->end()))
+ if (!ReachableBlocks.contains(&BB))
+ UnreachableBlocks.push_back(&BB);
+ DeleteDeadBlocks(UnreachableBlocks);
+ }
+
// Now that the inlined function body has been fully constructed, go through
// and zap unconditional fall-through branches. This happens all the time when
// specializing code: code specialization turns conditional branches into
// uncond branches, and this code folds them.
- Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
Function::iterator I = Begin;
while (I != NewFunc->end()) {
- // We need to simplify conditional branches and switches with a constant
- // operand. We try to prune these out when cloning, but if the
- // simplification required looking through PHI nodes, those are only
- // available after forming the full basic block. That may leave some here,
- // and we still want to prune the dead code as early as possible.
- //
- // Do the folding before we check if the block is dead since we want code
- // like
- // bb:
- // br i1 undef, label %bb, label %bb
- // to be simplified to
- // bb:
- // br label %bb
- // before we call I->getSinglePredecessor().
- ConstantFoldTerminator(&*I);
-
- // Check if this block has become dead during inlining or other
- // simplifications. Note that the first block will appear dead, as it has
- // not yet been wired up properly.
- if (I != Begin && (pred_empty(&*I) || I->getSinglePredecessor() == &*I)) {
- BasicBlock *DeadBB = &*I++;
- DeleteDeadBlock(DeadBB);
- continue;
- }
-
BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
if (!BI || BI->isConditional()) {
++I;
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 24cd5747c5a4..cec159f6a448 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
@@ -857,8 +858,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
(ParamTy.size() + AggParamTy.size()) ==
(inputs.size() + outputs.size()) &&
"Number of scalar and aggregate params does not match inputs, outputs");
- assert(StructValues.empty() ||
- AggregateArgs && "Expeced StructValues only with AggregateArgs set");
+ assert((StructValues.empty() || AggregateArgs) &&
+ "Expeced StructValues only with AggregateArgs set");
// Concatenate scalar and aggregate params in ParamTy.
size_t NumScalarParams = ParamTy.size();
diff --git a/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index f8ec8c6ad426..c1c5f5cc879f 100644
--- a/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -65,15 +65,18 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
for (const Use &U : V->uses()) {
const User *UR = U.getUser();
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) {
- // If the result of the constantexpr isn't pointer type, then we won't
- // know to expect it in various places. Just reject early.
- if (!isa<PointerType>(CE->getType()))
- return true;
-
- // FIXME: Do we need to add constexpr selects to VisitedUsers?
- if (analyzeGlobalAux(CE, GS, VisitedUsers))
- return true;
+ if (const Constant *C = dyn_cast<Constant>(UR)) {
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+ if (CE && isa<PointerType>(CE->getType())) {
+ // Recursively analyze pointer-typed constant expressions.
+ // FIXME: Do we need to add constexpr selects to VisitedUsers?
+ if (analyzeGlobalAux(CE, GS, VisitedUsers))
+ return true;
+ } else {
+ // Ignore dead constant users.
+ if (!isSafeToDestroyConstant(C))
+ return true;
+ }
} else if (const Instruction *I = dyn_cast<Instruction>(UR)) {
if (!GS.HasMultipleAccessingFunctions) {
const Function *F = I->getParent()->getParent();
@@ -169,10 +172,6 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
} else {
return true; // Any other non-load instruction might take address!
}
- } else if (const Constant *C = dyn_cast<Constant>(UR)) {
- // We might have a dead and dangling constant hanging off of here.
- if (!isSafeToDestroyConstant(C))
- return true;
} else {
// Otherwise must be some other user.
return true;
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index c9f872f5b7e1..923bcc781e47 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -39,6 +39,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
@@ -671,12 +672,9 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
// edge from this block.
SmallVector<Value *, 8> UnwindDestPHIValues;
BasicBlock *InvokeBB = II->getParent();
- for (Instruction &I : *UnwindDest) {
+ for (PHINode &PHI : UnwindDest->phis()) {
// Save the value to use for this edge.
- PHINode *PHI = dyn_cast<PHINode>(&I);
- if (!PHI)
- break;
- UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
+ UnwindDestPHIValues.push_back(PHI.getIncomingValueForBlock(InvokeBB));
}
// Add incoming-PHI values to the unwind destination block for the given basic
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 9f33d2f82732..9a10535c9310 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -45,6 +45,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 92333408aaef..5b66da1e7082 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -737,7 +737,7 @@ TargetTransformInfo::PeelingPreferences llvm::gatherPeelingPreferences(
/// for the bulk of dynamic execution, can be further simplified by scalar
/// optimizations.
bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+ ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC,
bool PreserveLCSSA) {
assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
@@ -756,23 +756,21 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// routes which can lead to the exit: we can reach it from the peeled
// iterations too.
DenseMap<BasicBlock *, BasicBlock *> NonLoopBlocksIDom;
- if (DT) {
- for (auto *BB : L->blocks()) {
- auto *BBDomNode = DT->getNode(BB);
- SmallVector<BasicBlock *, 16> ChildrenToUpdate;
- for (auto *ChildDomNode : BBDomNode->children()) {
- auto *ChildBB = ChildDomNode->getBlock();
- if (!L->contains(ChildBB))
- ChildrenToUpdate.push_back(ChildBB);
- }
- // The new idom of the block will be the nearest common dominator
- // of all copies of the previous idom. This is equivalent to the
- // nearest common dominator of the previous idom and the first latch,
- // which dominates all copies of the previous idom.
- BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latch);
- for (auto *ChildBB : ChildrenToUpdate)
- NonLoopBlocksIDom[ChildBB] = NewIDom;
+ for (auto *BB : L->blocks()) {
+ auto *BBDomNode = DT.getNode(BB);
+ SmallVector<BasicBlock *, 16> ChildrenToUpdate;
+ for (auto *ChildDomNode : BBDomNode->children()) {
+ auto *ChildBB = ChildDomNode->getBlock();
+ if (!L->contains(ChildBB))
+ ChildrenToUpdate.push_back(ChildBB);
}
+ // The new idom of the block will be the nearest common dominator
+ // of all copies of the previous idom. This is equivalent to the
+ // nearest common dominator of the previous idom and the first latch,
+ // which dominates all copies of the previous idom.
+ BasicBlock *NewIDom = DT.findNearestCommonDominator(BB, Latch);
+ for (auto *ChildBB : ChildrenToUpdate)
+ NonLoopBlocksIDom[ChildBB] = NewIDom;
}
Function *F = Header->getParent();
@@ -822,11 +820,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// If (cond) goto Header
// Exit:
- BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI);
+ BasicBlock *InsertTop = SplitEdge(PreHeader, Header, &DT, LI);
BasicBlock *InsertBot =
- SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI);
+ SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
BasicBlock *NewPreHeader =
- SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
+ SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
InsertTop->setName(Header->getName() + ".peel.begin");
InsertBot->setName(Header->getName() + ".peel.next");
@@ -852,23 +850,21 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
ValueToValueMapTy VMap;
cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
- LoopBlocks, VMap, LVMap, DT, LI,
+ LoopBlocks, VMap, LVMap, &DT, LI,
LoopLocalNoAliasDeclScopes);
// Remap to use values from the current iteration instead of the
// previous one.
remapInstructionsInBlocks(NewBlocks, VMap);
- if (DT) {
- // Update IDoms of the blocks reachable through exits.
- if (Iter == 0)
- for (auto BBIDom : NonLoopBlocksIDom)
- DT->changeImmediateDominator(BBIDom.first,
- cast<BasicBlock>(LVMap[BBIDom.second]));
+ // Update IDoms of the blocks reachable through exits.
+ if (Iter == 0)
+ for (auto BBIDom : NonLoopBlocksIDom)
+ DT.changeImmediateDominator(BBIDom.first,
+ cast<BasicBlock>(LVMap[BBIDom.second]));
#ifdef EXPENSIVE_CHECKS
- assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+ assert(DT.verify(DominatorTree::VerificationLevel::Fast));
#endif
- }
auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight);
@@ -877,7 +873,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);
InsertTop = InsertBot;
- InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
+ InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
InsertBot->setName(Header->getName() + ".peel.next");
F->getBasicBlockList().splice(InsertTop->getIterator(),
@@ -912,10 +908,10 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
SE->forgetTopmostLoop(L);
// Finally DomtTree must be correct.
- assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+ assert(DT.verify(DominatorTree::VerificationLevel::Fast));
// FIXME: Incrementally update loop-simplify
- simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA);
+ simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA);
NumPeeled++;
diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 7c9ab7f6ca2c..d6a6be2762c7 100644
--- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -264,3 +264,16 @@ void VFABI::setVectorVariantNames(
CI->addFnAttr(
Attribute::get(M->getContext(), MappingsAttrName, Buffer.str()));
}
+
+void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
+ StringRef SectionName) {
+ // Embed the buffer into the module.
+ Constant *ModuleConstant = ConstantDataArray::get(
+ M.getContext(), makeArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
+ GlobalVariable *GV = new GlobalVariable(
+ M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
+ ModuleConstant, "llvm.embedded.object");
+ GV->setSection(SectionName);
+
+ appendToCompilerUsed(M, GV);
+}
diff --git a/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
index 7083789267d9..deaee467531d 100644
--- a/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
+++ b/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
#include "llvm/Support/MD5.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index b35ab57e0d87..01b433b4782a 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -25,13 +25,13 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/IteratedDominanceFrontier.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -45,6 +45,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
#include <cassert>
diff --git a/llvm/lib/Transforms/Utils/Utils.cpp b/llvm/lib/Transforms/Utils/Utils.cpp
index 3ca36a1cad91..43eb5c87acee 100644
--- a/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/llvm/lib/Transforms/Utils/Utils.cpp
@@ -16,6 +16,7 @@
#include "llvm-c/Transforms/Utils.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp
index bbe6b3dc23b3..637181722f63 100644
--- a/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -2,6 +2,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "vncoerce"
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d11f4146b590..3290439ecd07 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -632,13 +632,6 @@ protected:
Instruction *EntryVal, VPValue *Def,
VPTransformState &State);
- /// Returns true if an instruction \p I should be scalarized instead of
- /// vectorized for the chosen vectorization factor.
- bool shouldScalarizeInstruction(Instruction *I) const;
-
- /// Returns true if we should generate a scalar version of \p IV.
- bool needsScalarInduction(Instruction *IV) const;
-
/// Returns (and creates if needed) the original loop trip count.
Value *getOrCreateTripCount(Loop *NewLoop);
@@ -2479,21 +2472,6 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
VecInd->addIncoming(LastInduction, LoopVectorLatch);
}
-bool InnerLoopVectorizer::shouldScalarizeInstruction(Instruction *I) const {
- return Cost->isScalarAfterVectorization(I, VF) ||
- Cost->isProfitableToScalarize(I, VF);
-}
-
-bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const {
- if (shouldScalarizeInstruction(IV))
- return true;
- auto isScalarInst = [&](User *U) -> bool {
- auto *I = cast<Instruction>(U);
- return (OrigLoop->contains(I) && shouldScalarizeInstruction(I));
- };
- return llvm::any_of(IV->users(), isScalarInst);
-}
-
void InnerLoopVectorizer::widenIntOrFpInduction(
PHINode *IV, VPWidenIntOrFpInductionRecipe *Def, VPTransformState &State,
Value *CanonicalIV) {
@@ -2549,27 +2527,6 @@ void InnerLoopVectorizer::widenIntOrFpInduction(
return ScalarIV;
};
- // Create the vector values from the scalar IV, in the absence of creating a
- // vector IV.
- auto CreateSplatIV = [&](Value *ScalarIV, Value *Step) {
- Value *Broadcasted = getBroadcastInstrs(ScalarIV);
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *StartIdx;
- if (Step->getType()->isFloatingPointTy())
- StartIdx =
- getRuntimeVFAsFloat(Builder, Step->getType(), State.VF * Part);
- else
- StartIdx = getRuntimeVF(Builder, Step->getType(), State.VF * Part);
-
- Value *EntryPart =
- getStepVector(Broadcasted, StartIdx, Step, ID.getInductionOpcode(),
- State.VF, State.Builder);
- State.set(Def, EntryPart, Part);
- if (Trunc)
- addMetadata(EntryPart, Trunc);
- }
- };
-
// Fast-math-flags propagate from the original induction instruction.
IRBuilder<>::FastMathFlagGuard FMFG(Builder);
if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
@@ -2605,36 +2562,18 @@ void InnerLoopVectorizer::widenIntOrFpInduction(
return;
}
- // Determine if we want a scalar version of the induction variable. This is
- // true if the induction variable itself is not widened, or if it has at
- // least one user in the loop that is not widened.
- auto NeedsScalarIV = needsScalarInduction(EntryVal);
- if (!NeedsScalarIV) {
+ // Create a new independent vector induction variable, if one is needed.
+ if (Def->needsVectorIV())
createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State);
- return;
- }
- // Try to create a new independent vector induction variable. If we can't
- // create the phi node, we will splat the scalar induction variable in each
- // loop iteration.
- if (!shouldScalarizeInstruction(EntryVal)) {
- createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State);
- Value *ScalarIV = CreateScalarIV(Step);
+ if (Def->needsScalarIV()) {
// Create scalar steps that can be used by instructions we will later
// scalarize. Note that the addition of the scalar steps will not increase
// the number of instructions in the loop in the common case prior to
// InstCombine. We will be trading one vector extract for each scalar step.
+ Value *ScalarIV = CreateScalarIV(Step);
buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State);
- return;
}
-
- // All IV users are scalar instructions, so only emit a scalar IV, not a
- // vectorised IV. Except when we tail-fold, then the splat IV feeds the
- // predicate used by the masked loads/stores.
- Value *ScalarIV = CreateScalarIV(Step);
- if (!Cost->isScalarEpilogueAllowed())
- CreateSplatIV(ScalarIV, Step);
- buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State);
}
void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
@@ -2663,17 +2602,15 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
}
// Determine the number of scalars we need to generate for each unroll
- // iteration. If EntryVal is uniform, we only need to generate the first
- // lane. Otherwise, we generate all VF values.
- bool IsUniform =
- Cost->isUniformAfterVectorization(cast<Instruction>(EntryVal), State.VF);
- unsigned Lanes = IsUniform ? 1 : State.VF.getKnownMinValue();
+ // iteration.
+ bool FirstLaneOnly = vputils::onlyFirstLaneUsed(Def);
+ unsigned Lanes = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
// Compute the scalar steps and save the results in State.
Type *IntStepTy = IntegerType::get(ScalarIVTy->getContext(),
ScalarIVTy->getScalarSizeInBits());
Type *VecIVTy = nullptr;
Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
- if (!IsUniform && State.VF.isScalable()) {
+ if (!FirstLaneOnly && State.VF.isScalable()) {
VecIVTy = VectorType::get(ScalarIVTy, State.VF);
UnitStepVec =
Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
@@ -2684,7 +2621,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
- if (!IsUniform && State.VF.isScalable()) {
+ if (!FirstLaneOnly && State.VF.isScalable()) {
auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
if (ScalarIVTy->isFloatingPointTy())
@@ -4565,7 +4502,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
// Determine the number of scalars we need to generate for each unroll
// iteration. If the instruction is uniform, we only need to generate the
// first lane. Otherwise, we generate all VF values.
- bool IsUniform = Cost->isUniformAfterVectorization(P, State.VF);
+ bool IsUniform = vputils::onlyFirstLaneUsed(PhiR);
assert((IsUniform || !State.VF.isScalable()) &&
"Cannot scalarize a scalable VF");
unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue();
@@ -5889,7 +5826,9 @@ bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
// consider interleaving beneficial (eg. MVE).
if (TTI.getMaxInterleaveFactor(VF.getKnownMinValue()) <= 1)
return false;
- if (VF.getFixedValue() >= EpilogueVectorizationMinVF)
+ // FIXME: We should consider changing the threshold for scalable
+ // vectors to take VScaleForTuning into account.
+ if (VF.getKnownMinValue() >= EpilogueVectorizationMinVF)
return true;
return false;
}
@@ -5940,29 +5879,21 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
return Result;
}
- auto FixedMainLoopVF = ElementCount::getFixed(MainLoopVF.getKnownMinValue());
- if (MainLoopVF.isScalable())
- LLVM_DEBUG(
- dbgs() << "LEV: Epilogue vectorization using scalable vectors not "
- "yet supported. Converting to fixed-width (VF="
- << FixedMainLoopVF << ") instead\n");
-
- if (!isEpilogueVectorizationProfitable(FixedMainLoopVF)) {
+ if (!isEpilogueVectorizationProfitable(MainLoopVF)) {
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is not profitable for "
"this loop\n");
return Result;
}
for (auto &NextVF : ProfitableVFs)
- if (ElementCount::isKnownLT(NextVF.Width, FixedMainLoopVF) &&
- (Result.Width.getFixedValue() == 1 ||
- isMoreProfitable(NextVF, Result)) &&
+ if (ElementCount::isKnownLT(NextVF.Width, MainLoopVF) &&
+ (Result.Width.isScalar() || isMoreProfitable(NextVF, Result)) &&
LVP.hasPlanWithVF(NextVF.Width))
Result = NextVF;
if (Result != VectorizationFactor::Disabled())
LLVM_DEBUG(dbgs() << "LEV: Vectorizing epilogue loop with VF = "
- << Result.Width.getFixedValue() << "\n";);
+ << Result.Width << "\n";);
return Result;
}
@@ -8546,16 +8477,54 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
Mask, Consecutive, Reverse);
}
-VPWidenIntOrFpInductionRecipe *
-VPRecipeBuilder::tryToOptimizeInductionPHI(PHINode *Phi,
- ArrayRef<VPValue *> Operands) const {
+static VPWidenIntOrFpInductionRecipe *
+createWidenInductionRecipe(PHINode *Phi, Instruction *PhiOrTrunc,
+ VPValue *Start, const InductionDescriptor &IndDesc,
+ LoopVectorizationCostModel &CM, Loop &OrigLoop,
+ VFRange &Range) {
+ // Returns true if an instruction \p I should be scalarized instead of
+ // vectorized for the chosen vectorization factor.
+ auto ShouldScalarizeInstruction = [&CM](Instruction *I, ElementCount VF) {
+ return CM.isScalarAfterVectorization(I, VF) ||
+ CM.isProfitableToScalarize(I, VF);
+ };
+
+ bool NeedsScalarIV = LoopVectorizationPlanner::getDecisionAndClampRange(
+ [&](ElementCount VF) {
+ // Returns true if we should generate a scalar version of \p IV.
+ if (ShouldScalarizeInstruction(PhiOrTrunc, VF))
+ return true;
+ auto isScalarInst = [&](User *U) -> bool {
+ auto *I = cast<Instruction>(U);
+ return OrigLoop.contains(I) && ShouldScalarizeInstruction(I, VF);
+ };
+ return any_of(PhiOrTrunc->users(), isScalarInst);
+ },
+ Range);
+ bool NeedsScalarIVOnly = LoopVectorizationPlanner::getDecisionAndClampRange(
+ [&](ElementCount VF) {
+ return ShouldScalarizeInstruction(PhiOrTrunc, VF);
+ },
+ Range);
+ assert(IndDesc.getStartValue() ==
+ Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()));
+ if (auto *TruncI = dyn_cast<TruncInst>(PhiOrTrunc)) {
+ return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, TruncI,
+ NeedsScalarIV, !NeedsScalarIVOnly);
+ }
+ assert(isa<PHINode>(PhiOrTrunc) && "must be a phi node here");
+ return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, NeedsScalarIV,
+ !NeedsScalarIVOnly);
+}
+
+VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
+ PHINode *Phi, ArrayRef<VPValue *> Operands, VFRange &Range) const {
+
// Check if this is an integer or fp induction. If so, build the recipe that
// produces its scalar and vector values.
- if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi)) {
- assert(II->getStartValue() ==
- Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
- return new VPWidenIntOrFpInductionRecipe(Phi, Operands[0], *II);
- }
+ if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi))
+ return createWidenInductionRecipe(Phi, Phi, Operands[0], *II, CM, *OrigLoop,
+ Range);
return nullptr;
}
@@ -8583,7 +8552,7 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
auto *Phi = cast<PHINode>(I->getOperand(0));
const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi);
VPValue *Start = Plan.getOrAddVPValue(II.getStartValue());
- return new VPWidenIntOrFpInductionRecipe(Phi, Start, II, I);
+ return createWidenInductionRecipe(Phi, I, Start, II, CM, *OrigLoop, Range);
}
return nullptr;
}
@@ -8865,7 +8834,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
if (auto Phi = dyn_cast<PHINode>(Instr)) {
if (Phi->getParent() != OrigLoop->getHeader())
return tryToBlend(Phi, Operands, Plan);
- if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands)))
+ if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range)))
return toVPRecipeResult(Recipe);
VPHeaderPHIRecipe *PhiRecipe = nullptr;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 99c265fc5101..15b349f53fd9 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -471,17 +471,36 @@ static bool isValidForAlternation(unsigned Opcode) {
return true;
}
+static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
+ unsigned BaseIndex = 0);
+
+/// Checks if the provided operands of 2 cmp instructions are compatible, i.e.
+/// compatible instructions or constants, or just some other regular values.
+static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0,
+ Value *Op1) {
+ return (isConstant(BaseOp0) && isConstant(Op0)) ||
+ (isConstant(BaseOp1) && isConstant(Op1)) ||
+ (!isa<Instruction>(BaseOp0) && !isa<Instruction>(Op0) &&
+ !isa<Instruction>(BaseOp1) && !isa<Instruction>(Op1)) ||
+ getSameOpcode({BaseOp0, Op0}).getOpcode() ||
+ getSameOpcode({BaseOp1, Op1}).getOpcode();
+}
+
/// \returns analysis of the Instructions in \p VL described in
/// InstructionsState, the Opcode that we suppose the whole list
/// could be vectorized even if its structure is diverse.
static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
- unsigned BaseIndex = 0) {
+ unsigned BaseIndex) {
// Make sure these are all Instructions.
if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); }))
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
bool IsCastOp = isa<CastInst>(VL[BaseIndex]);
bool IsBinOp = isa<BinaryOperator>(VL[BaseIndex]);
+ bool IsCmpOp = isa<CmpInst>(VL[BaseIndex]);
+ CmpInst::Predicate BasePred =
+ IsCmpOp ? cast<CmpInst>(VL[BaseIndex])->getPredicate()
+ : CmpInst::BAD_ICMP_PREDICATE;
unsigned Opcode = cast<Instruction>(VL[BaseIndex])->getOpcode();
unsigned AltOpcode = Opcode;
unsigned AltIndex = BaseIndex;
@@ -514,6 +533,57 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
continue;
}
}
+ } else if (IsCmpOp && isa<CmpInst>(VL[Cnt])) {
+ auto *BaseInst = cast<Instruction>(VL[BaseIndex]);
+ auto *Inst = cast<Instruction>(VL[Cnt]);
+ Type *Ty0 = BaseInst->getOperand(0)->getType();
+ Type *Ty1 = Inst->getOperand(0)->getType();
+ if (Ty0 == Ty1) {
+ Value *BaseOp0 = BaseInst->getOperand(0);
+ Value *BaseOp1 = BaseInst->getOperand(1);
+ Value *Op0 = Inst->getOperand(0);
+ Value *Op1 = Inst->getOperand(1);
+ CmpInst::Predicate CurrentPred =
+ cast<CmpInst>(VL[Cnt])->getPredicate();
+ CmpInst::Predicate SwappedCurrentPred =
+ CmpInst::getSwappedPredicate(CurrentPred);
+ // Check for compatible operands. If the corresponding operands are not
+ // compatible - need to perform alternate vectorization.
+ if (InstOpcode == Opcode) {
+ if (BasePred == CurrentPred &&
+ areCompatibleCmpOps(BaseOp0, BaseOp1, Op0, Op1))
+ continue;
+ if (BasePred == SwappedCurrentPred &&
+ areCompatibleCmpOps(BaseOp0, BaseOp1, Op1, Op0))
+ continue;
+ if (E == 2 &&
+ (BasePred == CurrentPred || BasePred == SwappedCurrentPred))
+ continue;
+ auto *AltInst = cast<CmpInst>(VL[AltIndex]);
+ CmpInst::Predicate AltPred = AltInst->getPredicate();
+ Value *AltOp0 = AltInst->getOperand(0);
+ Value *AltOp1 = AltInst->getOperand(1);
+ // Check if operands are compatible with alternate operands.
+ if (AltPred == CurrentPred &&
+ areCompatibleCmpOps(AltOp0, AltOp1, Op0, Op1))
+ continue;
+ if (AltPred == SwappedCurrentPred &&
+ areCompatibleCmpOps(AltOp0, AltOp1, Op1, Op0))
+ continue;
+ }
+ if (BaseIndex == AltIndex) {
+ assert(isValidForAlternation(Opcode) &&
+ isValidForAlternation(InstOpcode) &&
+ "Cast isn't safe for alternation, logic needs to be updated!");
+ AltIndex = Cnt;
+ continue;
+ }
+ auto *AltInst = cast<CmpInst>(VL[AltIndex]);
+ CmpInst::Predicate AltPred = AltInst->getPredicate();
+ if (BasePred == CurrentPred || BasePred == SwappedCurrentPred ||
+ AltPred == CurrentPred || AltPred == SwappedCurrentPred)
+ continue;
+ }
} else if (InstOpcode == Opcode || InstOpcode == AltOpcode)
continue;
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
@@ -3307,9 +3377,14 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
MapVector<OrdersType, unsigned,
DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
OrdersUses;
+ // Do the analysis for each tree entry only once, otherwise the order of
+ // the same node my be considered several times, though might be not
+ // profitable.
SmallPtrSet<const TreeEntry *, 4> VisitedOps;
for (const auto &Op : Data.second) {
TreeEntry *OpTE = Op.second;
+ if (!VisitedOps.insert(OpTE).second)
+ continue;
if (!OpTE->ReuseShuffleIndices.empty() ||
(IgnoreReorder && OpTE == VectorizableTree.front().get()))
continue;
@@ -3333,9 +3408,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
} else {
++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
}
- if (VisitedOps.insert(OpTE).second)
- OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second +=
- OpTE->UserTreeIndices.size();
+ OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second +=
+ OpTE->UserTreeIndices.size();
assert(OrdersUses[{}] > 0 && "Counter cannot be less than 0.");
--OrdersUses[{}];
}
@@ -4350,9 +4424,41 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
// Reorder operands if reordering would enable vectorization.
- if (isa<BinaryOperator>(VL0)) {
+ auto *CI = dyn_cast<CmpInst>(VL0);
+ if (isa<BinaryOperator>(VL0) || CI) {
ValueList Left, Right;
- reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
+ if (!CI || all_of(VL, [](Value *V) {
+ return cast<CmpInst>(V)->isCommutative();
+ })) {
+ reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
+ } else {
+ CmpInst::Predicate P0 = CI->getPredicate();
+ CmpInst::Predicate AltP0 = cast<CmpInst>(S.AltOp)->getPredicate();
+ CmpInst::Predicate AltP0Swapped = CmpInst::getSwappedPredicate(AltP0);
+ Value *BaseOp0 = VL0->getOperand(0);
+ Value *BaseOp1 = VL0->getOperand(1);
+ // Collect operands - commute if it uses the swapped predicate or
+ // alternate operation.
+ for (Value *V : VL) {
+ auto *Cmp = cast<CmpInst>(V);
+ Value *LHS = Cmp->getOperand(0);
+ Value *RHS = Cmp->getOperand(1);
+ CmpInst::Predicate CurrentPred = CI->getPredicate();
+ CmpInst::Predicate CurrentPredSwapped =
+ CmpInst::getSwappedPredicate(CurrentPred);
+ if (P0 == AltP0 || P0 == AltP0Swapped) {
+ if ((P0 == CurrentPred &&
+ !areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) ||
+ (P0 == CurrentPredSwapped &&
+ !areCompatibleCmpOps(BaseOp0, BaseOp1, RHS, LHS)))
+ std::swap(LHS, RHS);
+ } else if (!areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) {
+ std::swap(LHS, RHS);
+ }
+ Left.push_back(LHS);
+ Right.push_back(RHS);
+ }
+ }
TE->setOperand(0, Left);
TE->setOperand(1, Right);
buildTree_rec(Left, Depth + 1, {TE, 0});
@@ -5284,7 +5390,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
((Instruction::isBinaryOp(E->getOpcode()) &&
Instruction::isBinaryOp(E->getAltOpcode())) ||
(Instruction::isCast(E->getOpcode()) &&
- Instruction::isCast(E->getAltOpcode()))) &&
+ Instruction::isCast(E->getAltOpcode())) ||
+ (isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) &&
"Invalid Shuffle Vector Operand");
InstructionCost ScalarCost = 0;
if (NeedToShuffleReuses) {
@@ -5332,6 +5439,14 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
CostKind);
+ } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
+ VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy,
+ Builder.getInt1Ty(),
+ CI0->getPredicate(), CostKind, VL0);
+ VecCost += TTI->getCmpSelInstrCost(
+ E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
+ cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind,
+ E->getAltOp());
} else {
Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType();
Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
@@ -5348,6 +5463,29 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
[E](Instruction *I) {
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
+ if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) {
+ auto *AltCI0 = cast<CmpInst>(E->getAltOp());
+ auto *CI = cast<CmpInst>(I);
+ CmpInst::Predicate P0 = CI0->getPredicate();
+ CmpInst::Predicate AltP0 = AltCI0->getPredicate();
+ CmpInst::Predicate AltP0Swapped =
+ CmpInst::getSwappedPredicate(AltP0);
+ CmpInst::Predicate CurrentPred = CI->getPredicate();
+ CmpInst::Predicate CurrentPredSwapped =
+ CmpInst::getSwappedPredicate(CurrentPred);
+ if (P0 == AltP0 || P0 == AltP0Swapped) {
+ // Alternate cmps have same/swapped predicate as main cmps but
+ // different order of compatible operands.
+ return !(
+ (P0 == CurrentPred &&
+ areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
+ I->getOperand(0), I->getOperand(1))) ||
+ (P0 == CurrentPredSwapped &&
+ areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
+ I->getOperand(1), I->getOperand(0))));
+ }
+ return CurrentPred != P0 && CurrentPredSwapped != P0;
+ }
return I->getOpcode() == E->getAltOpcode();
},
Mask);
@@ -6830,11 +6968,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
((Instruction::isBinaryOp(E->getOpcode()) &&
Instruction::isBinaryOp(E->getAltOpcode())) ||
(Instruction::isCast(E->getOpcode()) &&
- Instruction::isCast(E->getAltOpcode()))) &&
+ Instruction::isCast(E->getAltOpcode())) ||
+ (isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) &&
"Invalid Shuffle Vector Operand");
Value *LHS = nullptr, *RHS = nullptr;
- if (Instruction::isBinaryOp(E->getOpcode())) {
+ if (Instruction::isBinaryOp(E->getOpcode()) || isa<CmpInst>(VL0)) {
setInsertPointAfterBundle(E);
LHS = vectorizeTree(E->getOperand(0));
RHS = vectorizeTree(E->getOperand(1));
@@ -6854,6 +6993,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, RHS);
V1 = Builder.CreateBinOp(
static_cast<Instruction::BinaryOps>(E->getAltOpcode()), LHS, RHS);
+ } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
+ V0 = Builder.CreateCmp(CI0->getPredicate(), LHS, RHS);
+ auto *AltCI = cast<CmpInst>(E->getAltOp());
+ CmpInst::Predicate AltPred = AltCI->getPredicate();
+ unsigned AltIdx =
+ std::distance(E->Scalars.begin(), find(E->Scalars, AltCI));
+ if (AltCI->getOperand(0) != E->getOperand(0)[AltIdx])
+ AltPred = CmpInst::getSwappedPredicate(AltPred);
+ V1 = Builder.CreateCmp(AltPred, LHS, RHS);
} else {
V0 = Builder.CreateCast(
static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy);
@@ -6878,6 +7026,29 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
[E](Instruction *I) {
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
+ if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) {
+ auto *AltCI0 = cast<CmpInst>(E->getAltOp());
+ auto *CI = cast<CmpInst>(I);
+ CmpInst::Predicate P0 = CI0->getPredicate();
+ CmpInst::Predicate AltP0 = AltCI0->getPredicate();
+ CmpInst::Predicate AltP0Swapped =
+ CmpInst::getSwappedPredicate(AltP0);
+ CmpInst::Predicate CurrentPred = CI->getPredicate();
+ CmpInst::Predicate CurrentPredSwapped =
+ CmpInst::getSwappedPredicate(CurrentPred);
+ if (P0 == AltP0 || P0 == AltP0Swapped) {
+ // Alternate cmps have same/swapped predicate as main cmps but
+ // different order of compatible operands.
+ return !(
+ (P0 == CurrentPred &&
+ areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
+ I->getOperand(0), I->getOperand(1))) ||
+ (P0 == CurrentPredSwapped &&
+ areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
+ I->getOperand(1), I->getOperand(0))));
+ }
+ return CurrentPred != P0 && CurrentPredSwapped != P0;
+ }
return I->getOpcode() == E->getAltOpcode();
},
Mask, &OpScalars, &AltScalars);
@@ -7676,11 +7847,8 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
for (ScheduleData *BundleMember = picked; BundleMember;
BundleMember = BundleMember->NextInBundle) {
Instruction *pickedInst = BundleMember->Inst;
- if (pickedInst->getNextNode() != LastScheduledInst) {
- BS->BB->getInstList().remove(pickedInst);
- BS->BB->getInstList().insert(LastScheduledInst->getIterator(),
- pickedInst);
- }
+ if (pickedInst->getNextNode() != LastScheduledInst)
+ pickedInst->moveBefore(LastScheduledInst);
LastScheduledInst = pickedInst;
}
@@ -8444,7 +8612,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
if (R.isTreeTinyAndNotFullyVectorizable())
continue;
R.reorderTopToBottom();
- R.reorderBottomToTop();
+ R.reorderBottomToTop(!isa<InsertElementInst>(Ops.front()));
R.buildExternalUses();
R.computeMinimumValueSizes();
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index e5dded3c0f1e..8822c0004eb2 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -75,7 +75,8 @@ class VPRecipeBuilder {
/// Check if an induction recipe should be constructed for \I. If so build and
/// return it. If not, return null.
VPWidenIntOrFpInductionRecipe *
- tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef<VPValue *> Operands) const;
+ tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef<VPValue *> Operands,
+ VFRange &Range) const;
/// Optimize the special case where the operand of \p I is a constant integer
/// induction variable.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index a96c122db2a9..342d4a074e10 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1649,3 +1649,9 @@ void VPSlotTracker::assignSlots(const VPlan &Plan) {
for (VPValue *Def : Recipe.definedValues())
assignSlot(Def);
}
+
+bool vputils::onlyFirstLaneUsed(VPValue *Def) {
+ return all_of(Def->users(), [Def](VPUser *U) {
+ return cast<VPRecipeBase>(U)->onlyFirstLaneUsed(Def);
+ });
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 824440f98a8b..bcaabca692cc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -759,6 +759,14 @@ public:
bool mayReadOrWriteMemory() const {
return mayReadFromMemory() || mayWriteToMemory();
}
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ /// Conservatively returns false.
+ virtual bool onlyFirstLaneUsed(const VPValue *Op) const {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return false;
+ }
};
inline bool VPUser::classof(const VPDef *Def) {
@@ -893,6 +901,24 @@ public:
/// Set the fast-math flags.
void setFastMathFlags(FastMathFlags FMFNew);
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ if (getOperand(0) != Op)
+ return false;
+ switch (getOpcode()) {
+ default:
+ return false;
+ case VPInstruction::ActiveLaneMask:
+ case VPInstruction::CanonicalIVIncrement:
+ case VPInstruction::CanonicalIVIncrementNUW:
+ case VPInstruction::BranchOnCount:
+ return true;
+ };
+ llvm_unreachable("switch should return");
+ }
};
/// VPWidenRecipe is a recipe for producing a copy of vector type its
@@ -1027,18 +1053,24 @@ public:
class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue {
PHINode *IV;
const InductionDescriptor &IndDesc;
+ bool NeedsScalarIV;
+ bool NeedsVectorIV;
public:
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start,
- const InductionDescriptor &IndDesc)
+ const InductionDescriptor &IndDesc,
+ bool NeedsScalarIV, bool NeedsVectorIV)
: VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(IV, this),
- IV(IV), IndDesc(IndDesc) {}
+ IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV),
+ NeedsVectorIV(NeedsVectorIV) {}
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start,
const InductionDescriptor &IndDesc,
- TruncInst *Trunc)
+ TruncInst *Trunc, bool NeedsScalarIV,
+ bool NeedsVectorIV)
: VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(Trunc, this),
- IV(IV), IndDesc(IndDesc) {}
+ IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV),
+ NeedsVectorIV(NeedsVectorIV) {}
~VPWidenIntOrFpInductionRecipe() override = default;
@@ -1082,6 +1114,12 @@ public:
const TruncInst *TruncI = getTruncInst();
return TruncI ? TruncI->getType() : IV->getType();
}
+
+ /// Returns true if a scalar phi needs to be created for the induction.
+ bool needsScalarIV() const { return NeedsScalarIV; }
+
+ /// Returns true if a vector phi needs to be created for the induction.
+ bool needsVectorIV() const { return NeedsVectorIV; }
};
/// A pure virtual base class for all recipes modeling header phis, including
@@ -1318,6 +1356,17 @@ public:
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ // Recursing through Blend recipes only, must terminate at header phi's the
+ // latest.
+ return all_of(users(), [this](VPUser *U) {
+ return cast<VPRecipeBase>(U)->onlyFirstLaneUsed(this);
+ });
+ }
};
/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
@@ -1495,6 +1544,13 @@ public:
bool isPacked() const { return AlsoPack; }
bool isPredicated() const { return IsPredicated; }
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return isUniform();
+ }
};
/// A recipe for generating conditional branches on the bits of a mask.
@@ -1651,6 +1707,16 @@ public:
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+
+ // Widened, consecutive memory operations only demand the first lane of
+ // their address.
+ return Op == getAddr() && isConsecutive();
+ }
};
/// Canonical scalar induction phi of the vector loop. Starting at the specified
@@ -1686,6 +1752,13 @@ public:
const Type *getScalarType() const {
return getOperand(0)->getLiveInIRValue()->getType();
}
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
};
/// A Recipe for widening the canonical induction variable of the vector loop.
@@ -2766,6 +2839,14 @@ public:
/// Return true if all visited instruction can be combined.
bool isCompletelySLP() const { return CompletelySLP; }
};
+
+namespace vputils {
+
+/// Returns true if only the first lane of \p Def is used.
+bool onlyFirstLaneUsed(VPValue *Def);
+
+} // end namespace vputils
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index fb5f3d428189..70ce773a8a85 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -47,7 +47,8 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue());
if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) {
VPValue *Start = Plan->getOrAddVPValue(II->getStartValue());
- NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, *II);
+ NewRecipe =
+ new VPWidenIntOrFpInductionRecipe(Phi, Start, *II, false, true);
} else {
Plan->addVPValue(Phi, VPPhi);
continue;
@@ -341,10 +342,16 @@ void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) {
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
- // If the induction recipe is canonical and the types match, use it
- // directly.
- if (WidenOriginalIV && WidenOriginalIV->isCanonical() &&
- WidenOriginalIV->getScalarType() == WidenNewIV->getScalarType()) {
+ if (!WidenOriginalIV || !WidenOriginalIV->isCanonical() ||
+ WidenOriginalIV->getScalarType() != WidenNewIV->getScalarType())
+ continue;
+
+ // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides
+ // everything WidenNewIV's users need. That is, WidenOriginalIV will
+ // generate a vector phi or all users of WidenNewIV demand the first lane
+ // only.
+ if (WidenOriginalIV->needsVectorIV() ||
+ vputils::onlyFirstLaneUsed(WidenNewIV)) {
WidenNewIV->replaceAllUsesWith(WidenOriginalIV);
WidenNewIV->eraseFromParent();
return;
diff --git a/llvm/lib/Transforms/Vectorize/Vectorize.cpp b/llvm/lib/Transforms/Vectorize/Vectorize.cpp
index 0296a995ad29..010ca28fc237 100644
--- a/llvm/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/Vectorize.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
using namespace llvm;
diff --git a/llvm/tools/llvm-ar/llvm-ar.cpp b/llvm/tools/llvm-ar/llvm-ar.cpp
index f7b29b884027..8842162f5216 100644
--- a/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/llvm/tools/llvm-ar/llvm-ar.cpp
@@ -90,6 +90,7 @@ OPTIONS:
--rsp-quoting - quoting style for response files
=posix - posix
=windows - windows
+ --thin - create a thin archive
--version - print the version and exit
@<file> - read options from <file>
@@ -118,7 +119,7 @@ MODIFIERS:
[P] - use full names when matching (implied for thin archives)
[s] - create an archive index (cf. ranlib)
[S] - do not build a symbol table
- [T] - create a thin archive
+ [T] - deprecated, use --thin instead
[u] - update only [files] newer than archive contents
[U] - use actual timestamps and uids/gids
[v] - be verbose about actions taken
@@ -390,8 +391,6 @@ static ArchiveOperation parseCommandLine() {
break;
case 'T':
Thin = true;
- // Thin archives store path names, so P should be forced.
- CompareFullPath = true;
break;
case 'L':
AddLibrary = true;
@@ -407,6 +406,10 @@ static ArchiveOperation parseCommandLine() {
}
}
+ // Thin archives store path names, so P should be forced.
+ if (Thin)
+ CompareFullPath = true;
+
// At this point, the next thing on the command line must be
// the archive name.
getArchive();
@@ -965,6 +968,8 @@ static void createSymbolTable(object::Archive *OldArchive) {
if (OldArchive->hasSymbolTable())
return;
+ if (OldArchive->isThin())
+ Thin = true;
performWriteOperation(CreateSymTab, OldArchive, nullptr, nullptr);
}
@@ -1202,6 +1207,11 @@ static int ar_main(int argc, char **argv) {
continue;
}
+ if (strcmp(*ArgIt, "--thin") == 0) {
+ Thin = true;
+ continue;
+ }
+
Match = matchFlagWithArg("format", ArgIt, Argv);
if (Match) {
FormatType = StringSwitch<Format>(Match)
diff --git a/llvm/tools/llvm-as/llvm-as.cpp b/llvm/tools/llvm-as/llvm-as.cpp
index 307a7f9b7999..11dad0d9c369 100644
--- a/llvm/tools/llvm-as/llvm-as.cpp
+++ b/llvm/tools/llvm-as/llvm-as.cpp
@@ -106,7 +106,7 @@ static void WriteOutputFile(const Module *M, const ModuleSummaryIndex *Index) {
else
// Otherwise, with an empty Module but non-empty Index, we write a
// combined index.
- WriteIndexToFile(*IndexToWrite, Out->os());
+ writeIndexToFile(*IndexToWrite, Out->os());
}
// Declare success.
diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp
index cb1c4116ff19..3cdef529504e 100644
--- a/llvm/tools/llvm-extract/llvm-extract.cpp
+++ b/llvm/tools/llvm-extract/llvm-extract.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IRReader/IRReader.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp
index d78c4dff7db4..8fc3a5d68500 100644
--- a/llvm/tools/llvm-lto/llvm-lto.cpp
+++ b/llvm/tools/llvm-lto/llvm-lto.cpp
@@ -497,7 +497,7 @@ static void createCombinedModuleSummaryIndex() {
raw_fd_ostream OS(OutputFilename + ".thinlto.bc", EC,
sys::fs::OpenFlags::OF_None);
error(EC, "error opening the file '" + OutputFilename + ".thinlto.bc'");
- WriteIndexToFile(CombinedIndex, OS);
+ writeIndexToFile(CombinedIndex, OS);
OS.close();
}
@@ -660,7 +660,7 @@ private:
std::error_code EC;
raw_fd_ostream OS(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
error(EC, "error opening the file '" + OutputFilename + "'");
- WriteIndexToFile(*CombinedIndex, OS);
+ writeIndexToFile(*CombinedIndex, OS);
}
/// Load the combined index from disk, then compute and generate
@@ -698,7 +698,7 @@ private:
std::error_code EC;
raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::OF_None);
error(EC, "error opening the file '" + OutputName + "'");
- WriteIndexToFile(*Index, OS, &ModuleToSummariesForIndex);
+ writeIndexToFile(*Index, OS, &ModuleToSummariesForIndex);
}
}
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
index 3cac77411845..6b731abd9ed9 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
@@ -417,6 +417,7 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
case MachO::LC_SUB_UMBRELLA:
case MachO::LC_SUB_CLIENT:
case MachO::LC_SUB_LIBRARY:
+ case MachO::LC_LINKER_OPTION:
// Nothing to update.
break;
default:
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 5e58c1365d80..6000460d3c23 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -255,9 +255,7 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
}
auto Reader = std::move(ReaderOrErr.get());
- bool IsIRProfile = Reader->isIRLevelProfile();
- bool HasCSIRProfile = Reader->hasCSIRLevelProfile();
- if (Error E = WC->Writer.setIsIRLevelProfile(IsIRProfile, HasCSIRProfile)) {
+ if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
consumeError(std::move(E));
WC->Errors.emplace_back(
make_error<StringError>(
@@ -266,7 +264,6 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
Filename);
return;
}
- WC->Writer.setInstrEntryBBEnabled(Reader->instrEntryBBEnabled());
for (auto &I : *Reader) {
if (Remapper)
@@ -2095,7 +2092,8 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
bool ShowAllFunctions, bool ShowCS,
uint64_t ValueCutoff, bool OnlyListBelow,
const std::string &ShowFunction, bool TextFormat,
- bool ShowBinaryIds, raw_fd_ostream &OS) {
+ bool ShowBinaryIds, bool ShowCovered,
+ raw_fd_ostream &OS) {
auto ReaderOrErr = InstrProfReader::create(Filename);
std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
if (ShowDetailedSummary && Cutoffs.empty()) {
@@ -2152,6 +2150,13 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
assert(Func.Counts.size() > 0 && "function missing entry counter");
Builder.addRecord(Func);
+ if (ShowCovered) {
+ if (std::any_of(Func.Counts.begin(), Func.Counts.end(),
+ [](uint64_t C) { return C; }))
+ OS << Func.Name << "\n";
+ continue;
+ }
+
uint64_t FuncMax = 0;
uint64_t FuncSum = 0;
for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
@@ -2228,7 +2233,7 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
if (Reader->hasError())
exitWithError(Reader->getError(), Filename);
- if (TextFormat)
+ if (TextFormat || ShowCovered)
return 0;
std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
bool IsIR = Reader->isIRLevelProfile();
@@ -2579,6 +2584,9 @@ static int show_main(int argc, const char *argv[]) {
"debug-info", cl::init(""),
cl::desc("Read and extract profile metadata from debug info and show "
"the functions it found."));
+ cl::opt<bool> ShowCovered(
+ "covered", cl::init(false),
+ cl::desc("Show only the functions that have been executed."));
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n");
@@ -2610,7 +2618,7 @@ static int show_main(int argc, const char *argv[]) {
Filename, ShowCounts, TopNFunctions, ShowIndirectCallTargets,
ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs,
ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction,
- TextFormat, ShowBinaryIds, OS);
+ TextFormat, ShowBinaryIds, ShowCovered, OS);
if (ProfileKind == sample)
return showSampleProfile(Filename, ShowCounts, TopNFunctions,
ShowAllFunctions, ShowDetailedSummary,
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index cfb618117d2b..04a67225401f 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -6393,6 +6393,7 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printFileHeaders() {
unsigned(ELF::EF_AMDGPU_MACH));
break;
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
W.printFlags("Flags", E.e_flags,
makeArrayRef(ElfHeaderAMDGPUFlagsABIVersion4),
unsigned(ELF::EF_AMDGPU_MACH),
diff --git a/llvm/tools/llvm-readobj/WasmDumper.cpp b/llvm/tools/llvm-readobj/WasmDumper.cpp
index d76332d1ba36..b4d726016437 100644
--- a/llvm/tools/llvm-readobj/WasmDumper.cpp
+++ b/llvm/tools/llvm-readobj/WasmDumper.cpp
@@ -183,7 +183,10 @@ void WasmDumper::printSectionHeaders() {
W.printNumber("Offset", Seg.Offset.Value.Int32);
else if (Seg.Offset.Opcode == wasm::WASM_OPCODE_I64_CONST)
W.printNumber("Offset", Seg.Offset.Value.Int64);
- else
+ else if (Seg.Offset.Opcode == wasm::WASM_OPCODE_GLOBAL_GET) {
+ ListScope Group(W, "Offset");
+ W.printNumber("Global", Seg.Offset.Value.Global);
+ } else
llvm_unreachable("unknown init expr opcode");
}
break;
diff --git a/llvm/tools/llvm-stress/llvm-stress.cpp b/llvm/tools/llvm-stress/llvm-stress.cpp
index 941b529da9b2..9135d60fdf92 100644
--- a/llvm/tools/llvm-stress/llvm-stress.cpp
+++ b/llvm/tools/llvm-stress/llvm-stress.cpp
@@ -34,14 +34,15 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/ToolOutputFile.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index 4de619df5b5f..a1f8f4809d5f 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -4645,39 +4645,33 @@ static void GenerateVariantsOf(TreePatternNodePtr N,
// If this node is commutative, consider the commuted order.
bool isCommIntrinsic = N->isCommutativeIntrinsic(CDP);
if (NodeInfo.hasProperty(SDNPCommutative) || isCommIntrinsic) {
- assert((N->getNumChildren()>=2 || isCommIntrinsic) &&
+ unsigned Skip = isCommIntrinsic ? 1 : 0; // First operand is intrinsic id.
+ assert(N->getNumChildren() >= (2 + Skip) &&
"Commutative but doesn't have 2 children!");
- // Don't count children which are actually register references.
- unsigned NC = 0;
- for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) {
+ // Don't allow commuting children which are actually register references.
+ bool NoRegisters = true;
+ unsigned i = 0 + Skip;
+ unsigned e = 2 + Skip;
+ for (; i != e; ++i) {
TreePatternNode *Child = N->getChild(i);
if (Child->isLeaf())
if (DefInit *DI = dyn_cast<DefInit>(Child->getLeafValue())) {
Record *RR = DI->getDef();
if (RR->isSubClassOf("Register"))
- continue;
+ NoRegisters = false;
}
- NC++;
}
// Consider the commuted order.
- if (isCommIntrinsic) {
- // Commutative intrinsic. First operand is the intrinsic id, 2nd and 3rd
- // operands are the commutative operands, and there might be more operands
- // after those.
- assert(NC >= 3 &&
- "Commutative intrinsic should have at least 3 children!");
- std::vector<std::vector<TreePatternNodePtr>> Variants;
- Variants.push_back(std::move(ChildVariants[0])); // Intrinsic id.
- Variants.push_back(std::move(ChildVariants[2]));
- Variants.push_back(std::move(ChildVariants[1]));
- for (unsigned i = 3; i != NC; ++i)
- Variants.push_back(std::move(ChildVariants[i]));
- CombineChildVariants(N, Variants, OutVariants, CDP, DepVars);
- } else if (NC == N->getNumChildren()) {
+ if (NoRegisters) {
std::vector<std::vector<TreePatternNodePtr>> Variants;
- Variants.push_back(std::move(ChildVariants[1]));
- Variants.push_back(std::move(ChildVariants[0]));
- for (unsigned i = 2; i != NC; ++i)
+ unsigned i = 0;
+ if (isCommIntrinsic)
+ Variants.push_back(std::move(ChildVariants[i++])); // Intrinsic id.
+ Variants.push_back(std::move(ChildVariants[i + 1]));
+ Variants.push_back(std::move(ChildVariants[i]));
+ i += 2;
+ // Remaining operands are not commuted.
+ for (; i != N->getNumChildren(); ++i)
Variants.push_back(std::move(ChildVariants[i]));
CombineChildVariants(N, Variants, OutVariants, CDP, DepVars);
}
diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp
index 7c1c37f7b370..e47bda725a17 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -521,6 +521,15 @@ void CodeGenSchedModels::collectProcModels() {
RecVec ProcRecords = Records.getAllDerivedDefinitions("Processor");
llvm::sort(ProcRecords, LessRecordFieldName());
+ // Check for duplicated names.
+ auto I = std::adjacent_find(ProcRecords.begin(), ProcRecords.end(),
+ [](const Record *Rec1, const Record *Rec2) {
+ return Rec1->getValueAsString("Name") == Rec2->getValueAsString("Name");
+ });
+ if (I != ProcRecords.end())
+ PrintFatalError((*I)->getLoc(), "Duplicate processor name " +
+ (*I)->getValueAsString("Name"));
+
// Reserve space because we can. Reallocation would be ok.
ProcModels.reserve(ProcRecords.size()+1);
@@ -1973,7 +1982,6 @@ void CodeGenSchedModels::collectProcResources() {
void CodeGenSchedModels::checkCompleteness() {
bool Complete = true;
- bool HadCompleteModel = false;
for (const CodeGenProcModel &ProcModel : procModels()) {
const bool HasItineraries = ProcModel.hasItineraries();
if (!ProcModel.ModelDef->getValueAsBit("CompleteModel"))
@@ -1985,7 +1993,7 @@ void CodeGenSchedModels::checkCompleteness() {
continue;
unsigned SCIdx = getSchedClassIdx(*Inst);
if (!SCIdx) {
- if (Inst->TheDef->isValueUnset("SchedRW") && !HadCompleteModel) {
+ if (Inst->TheDef->isValueUnset("SchedRW")) {
PrintError(Inst->TheDef->getLoc(),
"No schedule information for instruction '" +
Inst->TheDef->getName() + "' in SchedMachineModel '" +
@@ -2013,7 +2021,6 @@ void CodeGenSchedModels::checkCompleteness() {
Complete = false;
}
}
- HadCompleteModel = true;
}
if (!Complete) {
errs() << "\n\nIncomplete schedule models found.\n"
diff --git a/llvm/utils/TableGen/CompressInstEmitter.cpp b/llvm/utils/TableGen/CompressInstEmitter.cpp
index 94ad6ee285d4..1fd85939e74e 100644
--- a/llvm/utils/TableGen/CompressInstEmitter.cpp
+++ b/llvm/utils/TableGen/CompressInstEmitter.cpp
@@ -72,7 +72,6 @@
#include "CodeGenTarget.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/llvm/utils/TableGen/GICombinerEmitter.cpp b/llvm/utils/TableGen/GICombinerEmitter.cpp
index 63a9ed682d4f..0dea1ef00e4b 100644
--- a/llvm/utils/TableGen/GICombinerEmitter.cpp
+++ b/llvm/utils/TableGen/GICombinerEmitter.cpp
@@ -11,21 +11,21 @@
///
//===----------------------------------------------------------------------===//
+#include "CodeGenTarget.h"
+#include "GlobalISel/CodeExpander.h"
+#include "GlobalISel/CodeExpansions.h"
+#include "GlobalISel/GIMatchDag.h"
+#include "GlobalISel/GIMatchDagPredicate.h"
+#include "GlobalISel/GIMatchTree.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ScopedPrinter.h"
-#include "llvm/Support/Timer.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/StringMatcher.h"
#include "llvm/TableGen/TableGenBackend.h"
-#include "CodeGenTarget.h"
-#include "GlobalISel/CodeExpander.h"
-#include "GlobalISel/CodeExpansions.h"
-#include "GlobalISel/GIMatchDag.h"
-#include "GlobalISel/GIMatchTree.h"
#include <cstdint>
using namespace llvm;
diff --git a/llvm/utils/TableGen/GlobalISel/CodeExpander.cpp b/llvm/utils/TableGen/GlobalISel/CodeExpander.cpp
index 3ebb293f466e..42b4aabf2755 100644
--- a/llvm/utils/TableGen/GlobalISel/CodeExpander.cpp
+++ b/llvm/utils/TableGen/GlobalISel/CodeExpander.cpp
@@ -12,7 +12,6 @@
#include "CodeExpander.h"
#include "CodeExpansions.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
diff --git a/llvm/utils/TableGen/GlobalISel/CodeExpander.h b/llvm/utils/TableGen/GlobalISel/CodeExpander.h
index bd6946de5925..1291eb1ad940 100644
--- a/llvm/utils/TableGen/GlobalISel/CodeExpander.h
+++ b/llvm/utils/TableGen/GlobalISel/CodeExpander.h
@@ -15,10 +15,10 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/SMLoc.h"
namespace llvm {
class CodeExpansions;
+class SMLoc;
class raw_ostream;
/// Emit the given code with all '${foo}' placeholders expanded to their
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDag.h b/llvm/utils/TableGen/GlobalISel/GIMatchDag.h
index 37570648cad1..4c3c610aff74 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDag.h
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDag.h
@@ -16,7 +16,6 @@
#include "GIMatchDagPredicateDependencyEdge.h"
namespace llvm {
-class GIMatchDag;
/// This class manages lifetimes for data associated with the GIMatchDag object.
class GIMatchDagContext {
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp
index e59cb3aae49a..796479467df7 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp
@@ -8,6 +8,7 @@
#include "GIMatchDagEdge.h"
#include "GIMatchDagInstr.h"
+#include "GIMatchDagOperands.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h b/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h
index 0c39b50442b4..5e60448b30c1 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h
@@ -9,11 +9,14 @@
#ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGINSTR_H
#define LLVM_UTILS_TABLEGEN_GIMATCHDAGINSTR_H
-#include "GIMatchDagOperands.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
namespace llvm {
+class CodeGenInstruction;
class GIMatchDag;
+class GIMatchDagOperandList;
/// Represents an instruction in the match DAG. This object knows very little
/// about the actual instruction to be matched as the bulk of that is in
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp
index 1aca2f9dc135..6a9e33ac515e 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp
@@ -10,8 +10,8 @@
#include "llvm/TableGen/Record.h"
-#include "GIMatchDagOperands.h"
#include "../CodeGenInstruction.h"
+#include "GIMatchDag.h"
using namespace llvm;
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h
index 9b030d6edb13..08e541b76a5a 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h
@@ -9,8 +9,12 @@
#ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGPREDICATE_H
#define LLVM_UTILS_TABLEGEN_GIMATCHDAGPREDICATE_H
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "GIMatchDag.h"
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+#include "llvm/Support/raw_ostream.h"
+#endif
namespace llvm {
class CodeExpansions;
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp
index 2e804de1cd4e..921cbaf9c408 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp
@@ -9,6 +9,7 @@
#include "GIMatchDagPredicateDependencyEdge.h"
#include "GIMatchDagInstr.h"
+#include "GIMatchDagOperands.h"
#include "GIMatchDagPredicate.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h
index 9552adc5c625..af91afc6073d 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h
@@ -9,12 +9,14 @@
#ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGPREDICATEEDGE_H
#define LLVM_UTILS_TABLEGEN_GIMATCHDAGPREDICATEEDGE_H
-#include "GIMatchDagOperands.h"
+#include "llvm/Support/Compiler.h"
namespace llvm {
-class GIMatchDag;
class GIMatchDagInstr;
class GIMatchDagPredicate;
+class GIMatchDagOperand;
+
+class raw_ostream;
/// Represents a dependency that must be met to evaluate a predicate.
///
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp
index 00d57404b069..42055ad4f608 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "GIMatchTree.h"
+#include "GIMatchDagPredicate.h"
#include "../CodeGenInstruction.h"
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 25bc0adc2a81..018aa7ee2f71 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -32,7 +32,6 @@
#include "CodeGenDAGPatterns.h"
#include "SubtargetFeatureInfo.h"
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CodeGenCoverage.h"
#include "llvm/Support/CommandLine.h"
@@ -668,7 +667,6 @@ MatchTable &operator<<(MatchTable &Table, const MatchTableRecord &Value) {
class OperandMatcher;
class MatchAction;
class PredicateMatcher;
-class RuleMatcher;
class Matcher {
public:
diff --git a/llvm/utils/TableGen/InfoByHwMode.cpp b/llvm/utils/TableGen/InfoByHwMode.cpp
index 3d236b828032..73c4fbf0a5eb 100644
--- a/llvm/utils/TableGen/InfoByHwMode.cpp
+++ b/llvm/utils/TableGen/InfoByHwMode.cpp
@@ -18,7 +18,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include <set>
#include <string>
using namespace llvm;
diff --git a/llvm/utils/TableGen/InfoByHwMode.h b/llvm/utils/TableGen/InfoByHwMode.h
index c97add687ca2..44927d0bf0df 100644
--- a/llvm/utils/TableGen/InfoByHwMode.h
+++ b/llvm/utils/TableGen/InfoByHwMode.h
@@ -20,11 +20,9 @@
#include <map>
#include <string>
-#include <vector>
namespace llvm {
-struct CodeGenHwModes;
class Record;
class raw_ostream;
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index f4e5eb59cb80..a5aa4069e60f 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -18,7 +18,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/StringMatcher.h"
#include "llvm/TableGen/StringToOffsetTable.h"
#include "llvm/TableGen/TableGenBackend.h"
#include <algorithm>
diff --git a/llvm/utils/TableGen/OptParserEmitter.cpp b/llvm/utils/TableGen/OptParserEmitter.cpp
index 0809432dfd0d..d54132f3190b 100644
--- a/llvm/utils/TableGen/OptParserEmitter.cpp
+++ b/llvm/utils/TableGen/OptParserEmitter.cpp
@@ -13,7 +13,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
-#include <cctype>
#include <cstring>
#include <map>
#include <memory>
diff --git a/llvm/utils/TableGen/OptRSTEmitter.cpp b/llvm/utils/TableGen/OptRSTEmitter.cpp
index 5e44d033109a..11d896229f5b 100644
--- a/llvm/utils/TableGen/OptRSTEmitter.cpp
+++ b/llvm/utils/TableGen/OptRSTEmitter.cpp
@@ -8,15 +8,8 @@
#include "OptEmitter.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
-#include <cctype>
-#include <cstring>
-#include <map>
using namespace llvm;
diff --git a/llvm/utils/TableGen/PredicateExpander.h b/llvm/utils/TableGen/PredicateExpander.h
index 9e7a4a3925ac..27f049a715aa 100644
--- a/llvm/utils/TableGen/PredicateExpander.h
+++ b/llvm/utils/TableGen/PredicateExpander.h
@@ -17,12 +17,12 @@
#define LLVM_UTILS_TABLEGEN_PREDICATEEXPANDER_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/TableGen/Record.h"
+#include <vector>
namespace llvm {
class raw_ostream;
+class Record;
class PredicateExpander {
bool EmitCallsByRef;
diff --git a/llvm/utils/TableGen/RegisterBankEmitter.cpp b/llvm/utils/TableGen/RegisterBankEmitter.cpp
index 61f71309b6fb..d97d7acb87a7 100644
--- a/llvm/utils/TableGen/RegisterBankEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterBankEmitter.cpp
@@ -17,7 +17,6 @@
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
-#include "CodeGenHwModes.h"
#include "CodeGenRegisters.h"
#include "CodeGenTarget.h"
diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp
index 327b90d59db6..dc5c96c662be 100644
--- a/llvm/utils/TableGen/SearchableTableEmitter.cpp
+++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp
@@ -16,9 +16,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SourceMgr.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include <algorithm>
@@ -32,8 +29,6 @@ using namespace llvm;
namespace {
-struct GenericTable;
-
int getAsInt(Init *B) {
return cast<IntInit>(B->convertInitializerTo(IntRecTy::get()))->getValue();
}
diff --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp
index 24c11c8bc831..2d4a45f889be 100644
--- a/llvm/utils/TableGen/TableGen.cpp
+++ b/llvm/utils/TableGen/TableGen.cpp
@@ -289,7 +289,8 @@ int main(int argc, char **argv) {
#define __has_feature(x) 0
#endif
-#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) || \
+#if __has_feature(address_sanitizer) || \
+ (defined(__SANITIZE_ADDRESS__) && defined(__GNUC__)) || \
__has_feature(leak_sanitizer)
#include <sanitizer/lsan_interface.h>
diff --git a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
index 7518b262e6e9..74969053f095 100644
--- a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
+++ b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
@@ -14,6 +14,9 @@
//===----------------------------------------------------------------------===//
#include "WebAssemblyDisassemblerEmitter.h"
+#include "CodeGenInstruction.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Record.h"
namespace llvm {
diff --git a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h
index 60d3d9433eca..aba3a4bfd302 100644
--- a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h
+++ b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h
@@ -14,12 +14,13 @@
#ifndef LLVM_UTILS_TABLEGEN_WEBASSEMBLYDISASSEMBLEREMITTER_H
#define LLVM_UTILS_TABLEGEN_WEBASSEMBLYDISASSEMBLEREMITTER_H
-#include "CodeGenInstruction.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/raw_ostream.h"
namespace llvm {
+class CodeGenInstruction;
+class raw_ostream;
+
void emitWebAssemblyDisassemblerTables(
raw_ostream &OS,
const ArrayRef<const CodeGenInstruction *> &NumberedInstructions);
diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp
index 90e71a354d17..81ddea99740d 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.cpp
+++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp
@@ -15,9 +15,12 @@
#include "X86DisassemblerTables.h"
#include "X86DisassemblerShared.h"
-#include "llvm/ADT/STLExtras.h"
+#include "X86ModRMFilters.h"
+#include "llvm/ADT/STLArrayExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
#include <map>
using namespace llvm;
diff --git a/llvm/utils/TableGen/X86DisassemblerTables.h b/llvm/utils/TableGen/X86DisassemblerTables.h
index 2e4ff1e2ce08..966f7406efec 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.h
+++ b/llvm/utils/TableGen/X86DisassemblerTables.h
@@ -17,15 +17,18 @@
#define LLVM_UTILS_TABLEGEN_X86DISASSEMBLERTABLES_H
#include "X86DisassemblerShared.h"
-#include "X86ModRMFilters.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/X86DisassemblerDecoderCommon.h"
#include <map>
+#include <memory>
#include <vector>
namespace llvm {
+class raw_ostream;
namespace X86Disassembler {
+class ModRMFilter;
+
/// DisassemblerTables - Encapsulates all the decode tables being generated by
/// the table emitter. Contains functions to populate the tables as well as
/// to emit them as hierarchical C structures suitable for consumption by the
diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index 0a8d0750cf13..2a29331eb7e8 100644
--- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -40,8 +40,6 @@ struct ManualMapEntry {
: RegInstStr(RegInstStr), MemInstStr(MemInstStr), Strategy(Strategy) {}
};
-class IsMatch;
-
// List of instructions requiring explicitly aligned memory.
const char *ExplicitAlign[] = {"MOVDQA", "MOVAPS", "MOVAPD", "MOVNTPS",
"MOVNTPD", "MOVNTDQ", "MOVNTDQA"};
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp
index a9b384155965..4023d8f57318 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -15,8 +15,10 @@
#include "X86RecognizableInstr.h"
#include "X86DisassemblerShared.h"
+#include "X86DisassemblerTables.h"
#include "X86ModRMFilters.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/TableGen/Record.h"
#include <string>
using namespace llvm;
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h
index d4fad2cc3f0f..8f557d9ee5f5 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.h
+++ b/llvm/utils/TableGen/X86RecognizableInstr.h
@@ -16,13 +16,16 @@
#ifndef LLVM_UTILS_TABLEGEN_X86RECOGNIZABLEINSTR_H
#define LLVM_UTILS_TABLEGEN_X86RECOGNIZABLEINSTR_H
-#include "CodeGenTarget.h"
-#include "X86DisassemblerTables.h"
+#include "CodeGenInstruction.h"
#include "llvm/Support/DataTypes.h"
-#include "llvm/TableGen/Record.h"
+#include "llvm/Support/X86DisassemblerDecoderCommon.h"
+
+struct InstructionSpecifier;
namespace llvm {
+class Record;
+
#define X86_INSTR_MRM_MAPPING \
MAP(C0, 64) \
MAP(C1, 65) \
@@ -153,6 +156,8 @@ namespace X86Local {
namespace X86Disassembler {
+class DisassemblerTables;
+
/// RecognizableInstr - Encapsulates all information required to decode a single
/// instruction, as extracted from the LLVM instruction tables. Has methods
/// to interpret the information available in the LLVM tables, and to emit the